class EventsTestMixin(object): """ Helpers and setup for running tests that evaluate events emitted """ def setUp(self): super(EventsTestMixin, self).setUp() self.event_collection = MongoClient()["test"]["events"] self.event_collection.drop() self.start_time = datetime.now() def assert_event_emitted_num_times(self, event_name, event_time, event_user_id, num_times_emitted): """ Tests the number of times a particular event was emitted. :param event_name: Expected event name (e.g., "edx.course.enrollment.activated") :param event_time: Latest expected time, after which the event would fire (e.g., the beginning of the test case) :param event_user_id: user_id expected in the event :param num_times_emitted: number of times the event is expected to appear since the event_time """ self.assertEqual( self.event_collection.find( { "name": event_name, "time": {"$gt": event_time}, "event.user_id": int(event_user_id), } ).count(), num_times_emitted )
class TestMongoRouter(unittest.TestCase): def setUp(self): self.test_collection = MongoClient()["test_db"]["test_db"] def tearDown(self): self.test_collection.drop() def test_route(self): self.test_collection.insert_one({"test_id": "tid", "test": "success"}) router = MongoRouter() self.assertEquals( "success", router.route("test").find_one({ "test_id": "tid" }).get("test", None)) router.route("test").insert_one({ "test_id": "tid_2", "test": "success" }) self.assertEquals( "success", router.route("test").find_one({ "test_id": "tid_2" }).get("test", None))
class WAMPTicker(ApplicationSession): """ WAMP application - subscribes to the 'ticker' push api and saves pushed data into a mongodb """ @inlineCallbacks def onJoin(self, details): # open/create poloniex database, ticker collection/table self.db = MongoClient().poloniex['ticker'] self.db.drop() initTick = Poloniex().returnTicker() for market in initTick: initTick[market]['_id'] = market self.db.insert_one(initTick[market]) yield self.subscribe(self.onTick, 'ticker') print('Subscribed to Ticker') def onTick(self, *data): self.db.update_one({"_id": data[0]}, { "$set": { 'last': data[1], 'lowestAsk': data[2], 'highestBid': data[3], 'percentChange': data[4], 'baseVolume': data[5], 'quoteVolume': data[6], 'isFrozen': data[7], 'high24hr': data[8], 'low24hr': data[9] } }) def onDisconnect(self): # stop reactor if disconnected if reactor.running: reactor.stop()
class MongoStore(Store): def __init__(self, db, collection, url='mongodb://localhost'): self.collection = MongoClient(url)[db][collection] def fetch(self, oid): return self.collection.find_one({'_id':oid}) def fetch_all(self): return self.collection.find() def iter_ids(self): for obj in self.collection.find({}, {'_id':True}): yield obj['_id'] def save(self, obj): self.collection.save(obj) def save_many(self, obj_iter): self.collection.insert(obj_iter) def flush(self): self.collection.drop() def delete(self, oid): self.collection.delete_one({'_id':oid})
def parent(): SLEEP = 10 p = MongoClient().client["MP"].p p.drop(); p.insert_one({"_id": 1}); p.insert_one({"_id": 2}) isParent = True newpid1 = os.fork() # We are the child if newpid1 == 0: isParent = False child1() p = MongoClient().client["MP"].p; p.remove({"_id": 1}) # We are the parent else: newpid2 = os.fork() # We are the child if newpid2 == 0: isParent = False child2() p = MongoClient().client["MP"].p; p.remove({"_id": 2}) if not isParent: print "PROCESS FINISHED" else: wait = True while wait: ps = p.find({}) wait = False if ps.count() == 0 else True if wait: print "MAIN PROCESS WAITING: %i" % ps.count() time.sleep(SLEEP) print "MAIN PROCESS FINISHED"
class TestBucketIntegration(unittest.TestCase): def setUp(self): self.db = database.Database(HOST, PORT, DB_NAME) self.bucket = bucket.Bucket(self.db, BUCKET) self.mongo_collection = MongoClient(HOST, PORT)[DB_NAME][BUCKET] def setup__timestamp_data(self): self.mongo_collection.save({ "_id": 'last', "_timestamp": d_tz(2013, 3, 1), "_week_start_at": d_tz(2013, 2, 25) }) self.mongo_collection.save({ "_id": 'first', "_timestamp": d_tz(2013, 1, 1), "_week_start_at": d_tz(2012, 12, 31) }) self.mongo_collection.save({ "_id": 'second', "_timestamp": d_tz(2013, 2, 1), "_week_start_at": d_tz(2013, 1, 28) }) def tearDown(self): self.mongo_collection.drop() def test_that_records_get_sent_to_mongo_correctly(self): my_record = Record({'foo': 'bar'}) self.bucket.store(my_record) collection = self.mongo_collection.find() assert_that(list(collection), only_contains( has_entries({"foo": "bar"}) )) def test_that_a_list_of_records_get_sent_to_mongo_correctly(self): my_records = [ Record({'name': 'Groucho'}), Record({'name': 'Harpo'}), Record({'name': 'Chico'}) ] self.bucket.store(my_records) collection = self.mongo_collection.find() assert_that(list(collection), only_contains( has_entries({'name': 'Groucho'}), has_entries({'name': 'Harpo'}), has_entries({'name': 'Chico'}) )) def test_period_queries_get_sorted_by__week_start_at(self): self.setup__timestamp_data() query = Query.create(period="week") result = query.execute(self.bucket.repository) assert_that(result.data(), contains( has_entry('_start_at', d_tz(2012, 12, 31)), has_entry('_start_at', d_tz(2013, 1, 28)), has_entry('_start_at', d_tz(2013, 2, 25)) ))
class TestBucketIntegration(unittest.TestCase): def setUp(self): self.db = database.Database(HOST, PORT, DB_NAME) self.bucket = bucket.Bucket(self.db, BUCKET) self.mongo_collection = MongoClient(HOST, PORT)[DB_NAME][BUCKET] def setup__timestamp_data(self): self.mongo_collection.save({ "_id": 'last', "_timestamp": d_tz(2013, 3, 1), "_week_start_at": d_tz(2013, 2, 25) }) self.mongo_collection.save({ "_id": 'first', "_timestamp": d_tz(2013, 1, 1), "_week_start_at": d_tz(2012, 12, 31) }) self.mongo_collection.save({ "_id": 'second', "_timestamp": d_tz(2013, 2, 1), "_week_start_at": d_tz(2013, 1, 28) }) def tearDown(self): self.mongo_collection.drop() def test_that_records_get_sent_to_mongo_correctly(self): my_record = Record({'foo': 'bar'}) self.bucket.store(my_record) collection = self.mongo_collection.find() assert_that(list(collection), only_contains(has_entries({"foo": "bar"}))) def test_that_a_list_of_records_get_sent_to_mongo_correctly(self): my_records = [ Record({'name': 'Groucho'}), Record({'name': 'Harpo'}), Record({'name': 'Chico'}) ] self.bucket.store(my_records) collection = self.mongo_collection.find() assert_that( list(collection), only_contains(has_entries({'name': 'Groucho'}), has_entries({'name': 'Harpo'}), has_entries({'name': 'Chico'}))) def test_period_queries_get_sorted_by__week_start_at(self): self.setup__timestamp_data() query = Query.create(period="week") result = query.execute(self.bucket.repository) assert_that( result.data(), contains(has_entry('_start_at', d_tz(2012, 12, 31)), has_entry('_start_at', d_tz(2013, 1, 28)), has_entry('_start_at', d_tz(2013, 2, 25))))
class RepositoryIntegrationTest(unittest.TestCase): __metaclass__ = ABCMeta def setUp(self): mongo = MongoDriver(MongoClient(HOST, PORT)[DB_NAME][BUCKET]) self.repo = Repository(mongo) self.mongo_collection = MongoClient(HOST, PORT)[DB_NAME][BUCKET] self.mongo_collection.drop()
class PluginXmlManager(): def __init__(self): self.pluginPath = MongoClient('localhost', 27017).beat.Plugin self.systemPath = MongoClient('localhost', 27017).beat.System def uploadSystemOnSave(self, xml): my_dict = xmltodict.parse(xml) self.systemPath.insert_one(my_dict) def deleteSystem(self): self.systemPath.drop() def getListOfPlugins(self): pluginList = self.pluginPath.find() list_of_plugins = [] for item in pluginList: list_of_plugins.append(item['Plugin']['Plugin_name']['#text']) return list_of_plugins def deleteSelectedPlugin(self, nameofplugin): myquery = {"Plugin.Plugin_name.#text": nameofplugin} self.pluginPath.delete_one(myquery) def getSelectedPlugin(self, plugin_name): pluginList = self.pluginPath.find() for item in pluginList: if item['Plugin']['Plugin_name']['#text'] == plugin_name: return item for item in projectsList: if item['Project']['BinaryFilePath']['#text'] == project_name: return item def uploadPlugin(self, xml): my_dict = xmltodict.parse(xml) self.pluginPath.insert_one(my_dict) def pluginExists(self, new_plugin_name): pluginList = self.pluginPath.find() for item in pluginList: if item['Plugin']['Plugin_name']['#text'] == new_plugin_name: return True return False def updatePluginDescription(self, old_description, new_description): myquery = {"Plugin.Plugin_Desc.#text": old_description} new_values = {"$set": {"Plugin.Plugin_Desc.#text": new_description}} self.pluginPath.update_one(myquery, new_values) # holder element of where to place xml2 def xmlMerger(self, holder, xml, xml2): tree1 = ET.parse(xml) tree2 = ET.parse(xml2) xml2 = tree2.getroot() for element1 in tree1.findall(holder): element1.append(xml2) return tree1
def setUp(self): product = MongoClient()['test'].product pass product.drop() data = [ {'name': 'kami', 'age': 11}, {'name': 'kamisama', 'age': 12} ] product.insert(data) self.test_target = Db2Csv(product)
def main(): import sys from time import time t0 = time() if len(sys.argv) < (len(CLI_ARGS)+1): print "Usage:", sys.argv[0], " ".join(CLI_ARGS), " ".join(OPT_ARGS) exit() # Run that immediately so that we crash on the stop if we cannot connect to the DB anyway from pymongo import MongoClient mdb_host = sys.argv[3].strip() mdb = MongoClient(host=mdb_host).users.clicks_per_query input_path = sys.argv[1].strip() log_filepath = sys.argv[2].strip() allowed_user_ids = set() with univ_open(input_path, 'r') as f: for line in f: allowed_user_ids.add(int(line.strip())) print "Loaded", len(allowed_user_ids), "allowed user ids." lp = LogProcessor(log_filepath) t0 = time() lp.process(allowed_user_ids=allowed_user_ids) print "Processed in", time() - t0 print "Dropping previous DB" mdb.drop() print "Dumping everything into MongoDB" t0 = time() batch_size = 200000 for i in xrange(batch_size, len(lp.user_clicks_number), batch_size): start = i-batch_size end = i print "Batch", start, end # Not: here we only keep the elements of the list that are not None # as the ones that are None are elements filtered out by the allowed_users_ids mdb.insert([_ for _ in lp.user_clicks_number[start:end] if _ is not None]) # len(lp.user_clicks_number) was not a multiple of batch_size, let us execute the last batch: if i is not len(lp.user_clicks_number)-1: print "Last batch..." mdb.insert(lp.user_clicks_number[i:]) print "Done in", time()-t0 print "Terminating script."
def upload(source_json, source, db_name=DB_NAME, coll_name=VERBS, drop=False, indices=(VERB, PARADIGM)): target = MongoClient(LOCALHOST, PORT)[db_name][coll_name] if drop: target.drop() print('Initially,', target.count(), 'entries') count = counter() for line in read_json_lines(source_json): next(count) line[SOURCE] = source target.insert(line) add_indices(target, indices) print('\nCurrently,', target.count(), 'entries')
class WAMPTicker(ApplicationSession): """ WAMP application - subscribes to the 'ticker' push api and saves pushed data into a mongodb """ @inlineCallbacks def onJoin(self, details): # open/create poloniex database, ticker collection/table self.db = MongoClient().poloniex['ticker'] self.db.drop() self.populateTicker() yield self.subscribe(self.onTick, 'ticker') logger.info('Subscribed to Ticker') def populateTicker(self): initTick = self.api.returnTicker() for market in initTick: initTick[market]['_id'] = market self.db.update_one( {'_id': market}, {'$set': initTick[market]}, upsert=True) logger.info('Populated markets database with ticker data') def onTick(self, *data): logger.debug(data) self.db.update_one( {"_id": data[0]}, {"$set": {'last': data[1], 'lowestAsk': data[2], 'highestBid': data[3], 'percentChange': data[4], 'baseVolume': data[5], 'quoteVolume': data[6], 'isFrozen': data[7], 'high24hr': data[8], 'low24hr': data[9] }}, upsert=True) def onDisconnect(self): # stop reactor if disconnected if reactor.running: reactor.stop()
class PrepareCorpus(object): """ Prepare and inject a corpus. """ def __init__(self, **kwargs): self.corpus_id = kwargs.pop('corpus_id') self.parse = kwargs.pop('parsecls')(**kwargs) self.store = MongoClient()['docs'][self.corpus_id] def __call__(self): log.info("Dropping existing %s document...", self.corpus_id) self.store.drop() log.info('Loading documents for corpus %s...', self.corpus_id) for i, doc in enumerate(self.parse()): if i % 250 == 0: log.debug('Processed %i documents...', i) self.store.insert(doc.json()) log.info('Import completed for %i documents.', i+1) APPS=set() @classmethod def Register(cls, c): cls.APPS.add(c) return c @classmethod def add_arguments(cls, p): p.add_argument('corpus_id', metavar='CORPUS_ID') p.set_defaults(cls=cls) app_name = 'prepare' sp = p.add_subparsers() for c in cls.APPS: name = c.__name__.lower() name = name.replace(app_name,'') csp = sp.add_parser( name, help=c.__doc__.split('\n')[0], description=textwrap.dedent(c.__doc__.rstrip()), formatter_class=argparse.RawDescriptionHelpFormatter) c.add_arguments(csp) return p
class MongoRepository: def __init__(self, index_name): url = os.environ.get('MONGO_URI') self.db = MongoClient(url).imsearch[index_name] def clean(self): self.db.drop() def insert_one(self, data): return self.db.insert_one(data) def insert_many(self, data): return self.db.insert_many(data) def find_one(self, query): response = self.db.find_one(query) return response def find(self, query): return list(self.db.find(query))
def drop(client, db, cell): # cell '''Delete a cell. If a cell did not exist, it is dropped nevertheless. Ask your favourite philosopher what it means to delete something that did not exist. Example: \b $ zoo drop --db zika --cell survey Are you sure you want to drop the db? [y/N]: y Dropped cell "animals" from database "zika". \b $ zoo drop --db zika --cell survey --force # no confirmation Dropped cell "animals" from database "zika". ''' c = MongoClient(client)[db][cell] c.drop() print('Dropped cell', '"' + cell + '"', 'from database', '"' + db + '".')
def main(): excel = raw_input("Enter excel file to load: ") collection = raw_input("Enter collection (if empty staff collection will be populated): ") # collection = ['staff', collection][collection == collection] collection = collection if collection else 'staff' client = MongoClient("localhost", 27017)['hppi'][collection] rd = xlrd.open_workbook(excel) sheet = rd.sheet_by_index(0) header = sheet.row_values(0) # column headers if fields(set(header), collection) == 0: # client.drop() for rownum in range(1, sheet.nrows): row = sheet.row_values(rownum) # row values data = {} for el in range(len(header)): if sheet.cell(rownum, el).ctype == 3: # i.e. datetype dt = datetime.datetime(*xlrd.xldate_as_tuple(row[el], rd.datemode)) # convert date from excel else: dt = row[el] data[header[el]] = dt # pack data in a dictionary data[name] = value if collection == 'staff': data["access"] = 1 # data access right by default (can edit only themselves) data["pubsnum"] = 0 # num of pubs data["prnd"] = 0 # for prnd data["prnd_data"] = {} data["publist"] = {} data["graduated_year"] = int(data["graduated_year"]) if not re.match(r"^\S+@\S+\.\S+$", data["email"]): print "No correct email for someone with the surname", data["surname"], "skipping" else: user = client.find_one({"email": data["email"]}) if not user: client.insert_one(data) # filling mongodb's collection named "staff" with data else: print "Staff member already present in DB, entry is updated" client.update_one({'email': data["email"]}, {'$set': data}) else: client.drop() client.insert_one(data) else: print "Column names do not correspond to the specification!"
class MongoStore(Store): def __init__(self, db, collection, url='mongodb://localhost'): self.collection = MongoClient(url)[db][collection] def fetch(self, oid): return self.collection.find_one({'_id': oid}) def fetch_all(self): return self.collection.find() def save(self, obj): self.collection.save(obj) def save_many(seld, obj_iter): self.collection.insert(obj_iter) def flush(self): self.collection.drop() def delete(self, oid): return self.collection.drop({'_id': oid})
class PrepareCorpus(object): """ Prepare and inject a corpus. """ def __init__(self, **kwargs): self.corpus_id = kwargs.pop('corpus_id') self.parse = kwargs.pop('parsecls')(**kwargs) self.store = MongoClient()['docs'][self.corpus_id] def __call__(self): log.info("Dropping existing %s document...", self.corpus_id) self.store.drop() log.info('Loading documents for corpus %s...', self.corpus_id) for i, doc in enumerate(self.parse()): if i % 100 == 0: log.debug('Processed %i documents...', i) self.store.insert(doc.json()) log.info('Done.') APPS=set() @classmethod def Register(cls, c): cls.APPS.add(c) return c @classmethod def add_arguments(cls, p): p.add_argument('corpus_id', metavar='CORPUS_ID') p.set_defaults(cls=cls) app_name = 'prepare' sp = p.add_subparsers() for c in cls.APPS: name = c.__name__.lower() name = name.replace(app_name,'') csp = sp.add_parser( name, help=c.__doc__.split('\n')[0], description=textwrap.dedent(c.__doc__.rstrip()), formatter_class=argparse.RawDescriptionHelpFormatter) c.add_arguments(csp) return p
class TickPolo(poloniex.Poloniex): def __init__(self, *args, **kwargs): super(TickPolo, self).__init__(*args, **kwargs) self.db = MongoClient().poloniex['ticker'] self.db.drop() tick = self.returnTicker() self._ids = {market: int(tick[market]['id']) for market in tick} for market in tick: self.db.update_one({'_id': market}, { '$set': {item: float(tick[market][item]) for item in tick[market]} }, upsert=True) def ticker(self, market=None): '''returns ticker data saved from websocket ''' if not self._t or not self._running: self.logger.error('Websocket is not running!') return self.returnTicker() if market: return self.db.find_one({'_id': market}) return list(self.db.find()) def on_ticker(self, data): data = [float(dat) for dat in data] self.db.update_one({"id": int(data[0])}, { "$set": { 'last': data[1], 'lowestAsk': data[2], 'highestBid': data[3], 'percentChange': data[4], 'baseVolume': data[5], 'quoteVolume': data[6], 'isFrozen': int(data[7]), 'high24hr': data[8], 'low24hr': data[9] } }, upsert=True)
class MongoStore(Store): def __init__(self, db, collection, uri='mongodb://localhost'): self.collection = MongoClient(uri)[db][collection] @classmethod def get_protocol(cls): return 'mongodb' def iter_ids(self): for obj in self.collection.find({}, {'_id':True}): yield obj['_id'] def flush(self): self.collection.drop() def exists(self, oid): return self.collection.find({'_id':oid}).count() == 1 def delete(self, oid): self.collection.delete_one({'_id':oid}) def fetch(self, oid): return self.collection.find_one({'_id':oid}) def fetch_all(self): return self.collection.find() def save(self, obj): self.collection.save(obj) def save_many(self, obj_iter): self.collection.insert(obj_iter) @classmethod def Get(cls, store_id, uri='mongodb://localhost', **kwargs): db, collection = store_id.split(':') return cls(db, collection, uri)
def parent(): SLEEP = 10 p = MongoClient().client["MP"].p p.drop() p.insert_one({"_id": 1}) p.insert_one({"_id": 2}) isParent = True newpid1 = os.fork() # We are the child if newpid1 == 0: isParent = False child1() p = MongoClient().client["MP"].p p.remove({"_id": 1}) # We are the parent else: newpid2 = os.fork() # We are the child if newpid2 == 0: isParent = False child2() p = MongoClient().client["MP"].p p.remove({"_id": 2}) if not isParent: print "PROCESS FINISHED" else: wait = True while wait: ps = p.find({}) wait = False if ps.count() == 0 else True if wait: print "MAIN PROCESS WAITING: %i" % ps.count() time.sleep(SLEEP) print "MAIN PROCESS FINISHED"
'consumer_key': 'pOC3SwyaleFnF7HNiQ4VofXDz', 'consumer_secret': '9wnavUJlt1wjiPJxhtgElNO5CEsMxKS8M7GgsH7005yJuzsvu3', }, 'topics': ['mongodb', 'nsa', 'google'] } t = Tweets(account_details) t.connect() """ gevent.spawn(tweets, t).join() gevent.joinall([ gevent.spawn(user_stream, t.queue, t.api), gevent.spawn(topic_stream, t.queue, t.api, 'twitter') ]) """ for tw in t.timeline(): #doc = json.loads(tw, default=json_util.default) col.insert(tw) """ for tw in t.user_tweets(): json.dumps(tw, indent=4 * ' ') """ col = MongoClient()['firstapp2']['messages'] col.drop() mail(col) #tweets(col)
class TestBucketIntegration(unittest.TestCase): def setUp(self): self.db = database.Database(HOST, PORT, DB_NAME) self.bucket = bucket.Bucket( self.db, BucketConfig(BUCKET, data_group="group", data_type="type", max_age_expected=1000)) self.mongo_collection = MongoClient(HOST, PORT)[DB_NAME][BUCKET] def setup__timestamp_data(self): self.mongo_collection.save({ "_id": 'last', "_timestamp": d_tz(2013, 3, 1), "_week_start_at": d_tz(2013, 2, 25), "_updated_at": d_tz(2013, 8, 10) }) self.mongo_collection.save({ "_id": 'first', "_timestamp": d_tz(2013, 1, 1), "_week_start_at": d_tz(2012, 12, 31), "_updated_at": d_tz(2013, 9, 10) }) self.mongo_collection.save({ "_id": 'second', "_timestamp": d_tz(2013, 2, 1), "_week_start_at": d_tz(2013, 1, 28), "_updated_at": d_tz(2013, 10, 10) }) def tearDown(self): self.mongo_collection.drop() def test_that_records_get_sent_to_mongo_correctly(self): my_record = Record({'foo': 'bar'}) self.bucket.store(my_record) collection = self.mongo_collection.find() assert_that(list(collection), only_contains( has_entries({"foo": "bar"}) )) def test_that_a_list_of_records_get_sent_to_mongo_correctly(self): my_records = [ Record({'name': 'Groucho'}), Record({'name': 'Harpo'}), Record({'name': 'Chico'}) ] self.bucket.store(my_records) collection = self.mongo_collection.find() assert_that(list(collection), only_contains( has_entries({'name': 'Groucho'}), has_entries({'name': 'Harpo'}), has_entries({'name': 'Chico'}) )) def test_period_queries_get_sorted_by__week_start_at(self): self.setup__timestamp_data() query = Query.create(period=WEEK) result = query.execute(self.bucket.repository) assert_that(result.data(), contains( has_entry('_start_at', d_tz(2012, 12, 31)), has_entry('_start_at', d_tz(2013, 1, 28)), has_entry('_start_at', d_tz(2013, 2, 25)) )) def test_bucket_returns_last_updated(self): self.setup__timestamp_data() assert_that(self.bucket.get_last_updated(), equal_to(d_tz(2013, 10, 10))) def test_bucket_returns_none_if_there_is_no_last_updated(self): assert_that(self.bucket.get_last_updated(), is_(None)) def test_bucket_is_recent_enough(self): self.mongo_collection.save({ "_id": "first", "_updated_at": datetime.datetime.now() - datetime.timedelta(seconds=500) }) assert_that(self.bucket.is_recent_enough()) def test_bucket_is_not_recent_enough(self): self.mongo_collection.save({ "_id": "first", "_updated_at": datetime.datetime.now() - datetime.timedelta(seconds=50000) }) assert_that(not self.bucket.is_recent_enough())
class wsTicker(object): def __init__(self, api=None): self.api = api if not self.api: self.api = Poloniex(jsonNums=float) self.db = MongoClient().poloniex['ticker'] self.db.drop() self.ws = websocket.WebSocketApp("wss://api2.poloniex.com/", on_message=self.on_message, on_error=self.on_error, on_close=self.on_close) self.ws.on_open = self.on_open def __call__(self, market=None): """ returns ticker from mongodb """ if market: return self.db.find_one({'_id': market}) return list(self.db.find()) def on_message(self, ws, message): message = json.loads(message) if 'error' in message: print(message['error']) return if message[0] == 1002: if message[1] == 1: print('Subscribed to ticker') return if message[1] == 0: print('Unsubscribed to ticker') return data = message[2] self.db.update_one({"id": float(data[0])}, { "$set": { 'last': data[1], 'lowestAsk': data[2], 'highestBid': data[3], 'percentChange': data[4], 'baseVolume': data[5], 'quoteVolume': data[6], 'isFrozen': data[7], 'high24hr': data[8], 'low24hr': data[9] } }, upsert=True) def on_error(self, ws, error): print(error) def on_close(self, ws): print("Websocket closed!") def on_open(self, ws): tick = self.api.returnTicker() for market in tick: self.db.update_one({'_id': market}, {'$set': tick[market]}, upsert=True) print('Populated markets database with ticker data') self.ws.send(json.dumps({'command': 'subscribe', 'channel': 1002})) def start(self): self.t = Thread(target=self.ws.run_forever) self.t.daemon = True self.t.start() print('Thread started') def stop(self): self.ws.close() self.t.join() print('Thread joined')
class MongodbUtil(object): """ - .bashrc 또는 .bashprofile 에 MYSQL_PASSWD 를 설정해야 함. """ def __init__(self, mongo_url, db_name, collection_name, auto_connect=False): """ :param mongo_url: host, port, username, password, auth db :param db_name: database name :param collection_name: collection name :param auto_connect: default do not connect for multiprocessing (http://api.mongodb.com/python/current/faq.html#using-pymongo-with-multiprocessing) """ self.mongo_url = mongo_url self.db_name = db_name self.collection_name = collection_name self.auto_connect = auto_connect self.collection = MongoClient(mongo_url, socketKeepAlive=True, connect=auto_connect)[db_name][collection_name] def __repr__(self): return '%s (db_name:%s, collection_name:%s, auto_connect:%s)' % ( StringUtil.mask_passwd_in_url(self.mongo_url), self.db_name, self.collection_name, self.auto_connect) def __str__(self): return self.__repr__() def find(self, query=None, sort=None, limit=0): if query is None: query = {} if sort is None: sort = [('_id', ASCENDING)] for row in self.collection.find(query, no_cursor_timeout=True).sort(sort).limit(limit): yield row def count(self, query=None): if query is None: query = {} return self.collection.count(query, no_cursor_timeout=True) def find_one(self, query: dict, limit=0) -> dict: return self.collection.find_one(query, no_cursor_timeout=True).limit(limit) def create_index(self, field_list=None, unique=False): if field_list is None: field_list = [] for field in field_list: self.collection.create_index([(field, ASCENDING)], background=True, unique=unique) return def insert(self, row: dict): return self.collection.insert_one(row) def update_one(self, where_query: dict, update_content: dict, upsert=False): return self.collection.update_one( where_query, update_content, upsert=upsert ) def update(self, where_query: dict, update_content: dict, upsert=False): return self.collection.update_many( where_query, update_content, upsert=upsert ) def save(self, row): return self.collection.save(row) def delete(self, where_query: dict): result = self.collection.delete_one(where_query) if result: return result.deleted_count return 0 def drop(self): return self.collection.drop()
class PrepareCorpus(object): """ Prepare and inject a corpus. """ def __init__(self, **kwargs): self.corpus_id = kwargs.pop('corpus_id') self.parse = kwargs.pop('parsecls')(**kwargs) self.store = MongoClient()['docs'][self.corpus_id] def __call__(self): log.info("Dropping existing %s document...", self.corpus_id) self.store.drop() # mention statistics total_non_nil = 0 total_candidate_recalled = 0 chain_mention_counts = [] chain_candidate_counts = [] log.info('Loading documents for corpus %s...', self.corpus_id) for i, doc in enumerate(self.parse()): if i % 100 == 0: log.debug('Processed %i documents...', i) # accumulate mention statistics for chain in doc.chains: chain_mention_counts.append(len(chain.mentions)) chain_candidate_counts.append(len(chain.candidates)) if chain.resolution != None: total_non_nil += len(chain.mentions) if chain.resolution.id in [c.id for c in chain.candidates]: total_candidate_recalled += len(chain.mentions) else: log.warn('No candidate for chain: %s - %s', chain.resolution.id, ', '.join(set("'" + m.text.lower() + "'" for m in chain.mentions))) self.store.insert(doc.json()) total_chains = len(chain_mention_counts) total_mentions = sum(chain_mention_counts) section_delimiter = '-' * 40 log.info(section_delimiter) log.info('CORPUS STATISTICS') log.info(section_delimiter) log.info('Total mentions = %i', total_mentions) log.info('Total nil mentions (%%) = %i (%.2f)', total_mentions - total_non_nil, float(total_mentions - total_non_nil) / total_mentions) log.info(section_delimiter) log.info('Total chains = %i', total_chains) log.info('Mentions per Chain (σ) = %.1f (%.2f)', numpy.mean(chain_mention_counts), numpy.std(chain_mention_counts)) log.info(section_delimiter) log.info('Candidates per Chain (σ) = %.1f (%.2f)', numpy.mean(chain_candidate_counts), numpy.std(chain_candidate_counts)) no_candidates_count = sum(1 for c in chain_candidate_counts if c == 0) candidate_recall = 'n/a' if total_non_nil == 0 else '%.2f' % (float(total_candidate_recalled) / total_non_nil) log.info('Candidate Recall (%%) = %s', candidate_recall) log.info('Nil Candidate Chains (%%) = %i (%.2f)', no_candidates_count, float(no_candidates_count) / total_chains) log.info(section_delimiter) log.info('Import completed for %i documents.', i+1) APPS=set() @classmethod def Register(cls, c): cls.APPS.add(c) return c @classmethod def add_arguments(cls, p): p.add_argument('corpus_id', metavar='CORPUS_ID') p.set_defaults(cls=cls) app_name = 'prepare' sp = p.add_subparsers() for c in cls.APPS: name = c.__name__.lower() name = name.replace(app_name,'') csp = sp.add_parser( name, help=c.__doc__.split('\n')[0], description=textwrap.dedent(c.__doc__.rstrip()), formatter_class=argparse.RawDescriptionHelpFormatter) c.add_arguments(csp) return p
def clean_collection(): uri = os.environ.get("MONGO_HOST", "mongodb://localhost/test") collection = MongoClient(uri).test.test collection.drop() return collection
from pymongo import MongoClient from textblob import TextBlob from newspaper import Article from newspaper import ArticleException dataForEntityAnalysis = MongoClient().precogTask.dataForAnalysis.find() tweetCollection = MongoClient().precogTask.tweetCollection polarityCollection = MongoClient().precogTask.polarityCollection polarityCollection.drop() for ne in dataForEntityAnalysis: #Finding avg tweet polarity wordData = {} wordData['word'] = ne['word'] tweetPolarityList = [] totalTweetPolarity = 0 print("analysing word " + ne['word']) for t in ne['tweetList']: print("analysing tweet " + str(t)) tweet = tweetCollection.find_one({'_id' : t}) tempPol = TextBlob(tweet['text']).sentiment.polarity tweetPolarityList.append(tempPol) totalTweetPolarity += tempPol avgTweetPolarity = totalTweetPolarity/len(ne['tweetList']) wordData['tweetPolarityList'] = tweetPolarityList #finding average article polarity totalArticlePolarity = 0 articlePolarityList = [] for articleURL in ne['articleLinksList']:
class TestMongoDriver(unittest.TestCase): def setUp(self): self.mongo_driver = MongoDriver( MongoClient(HOST, PORT)[DB_NAME][BUCKET]) self.mongo_collection = MongoClient(HOST, PORT)[DB_NAME][BUCKET] self.mongo_collection.drop() def test_save(self): thing_to_save = {'name': 'test_document'} another_thing_to_save = {'name': '2nd_test_document'} self.mongo_driver.save(thing_to_save) self.mongo_driver.save(another_thing_to_save) results = self.mongo_collection.find() assert_that(results, has_item(thing_to_save)) assert_that(results, has_item(another_thing_to_save)) def test_save_updates_document_with_id(self): a_document = {"_id": "event1", "title": "I'm an event"} updated_document = {"_id": "event1", "title": "I'm another event"} self.mongo_driver.save(a_document) self.mongo_driver.save(updated_document) saved_documents = self.mongo_collection.find() assert_that(saved_documents, only_contains(updated_document)) def test_find(self): self._setup_people() results = self.mongo_driver.find(query={"plays": "guitar"}, sort=["name", "ascending"], limit=None) assert_that( results, contains( has_entries({ "name": "George", "plays": "guitar" }), has_entries({ "name": "John", "plays": "guitar" }), )) def test_find_sort_descending(self): self._setup_people() results = self.mongo_driver.find(query={"plays": "guitar"}, sort=["name", "descending"], limit=None) assert_that( results, contains( has_entries({ "name": "John", "plays": "guitar" }), has_entries({ "name": "George", "plays": "guitar" }), )) def test_find_with_limit(self): self._setup_people() results = self.mongo_driver.find(query={"plays": { "$ne": "guitar" }}, sort=["name", "descending"], limit=1) assert_that(results, contains(has_entries({ "name": "Ringo", "plays": "drums" }))) def test_group(self): self._setup_musical_instruments() results = self.mongo_driver.group(keys=["type"], query={}, collect_fields=[]) assert_that( results, contains_inanyorder( has_entries({ "_count": is_(2), "type": "wind" }), has_entries({ "_count": is_(3), "type": "string" }))) def test_group_with_query(self): self._setup_musical_instruments() results = self.mongo_driver.group(keys=["type"], query={"range": "high"}, collect_fields=[]) assert_that( results, contains_inanyorder( has_entries({ "_count": is_(1), "type": "wind" }), has_entries({ "_count": is_(2), "type": "string" }))) def test_group_and_collect_additional_properties(self): self._setup_musical_instruments() results = self.mongo_driver.group(keys=["type"], query={}, collect_fields=["range"]) assert_that( results, contains( has_entries({ "_count": is_(2), "type": "wind", "range": ["high", "low"] }), has_entries({ "_count": is_(3), "type": "string", "range": ["high", "high", "low"] }))) def test_group_and_collect_with_hyphen_in_field_name(self): self.mongo_collection.save({"type": "foo", "this-name": "bar"}) self.mongo_collection.save({"type": "foo", "this-name": "bar"}) self.mongo_collection.save({"type": "bar", "this-name": "bar"}) self.mongo_collection.save({"type": "bar", "this-name": "foo"}) results = self.mongo_driver.group(keys=["type"], query={}, collect_fields=["this-name"]) assert_that( results, contains( has_entries({ "_count": is_(2), "type": "foo", "this-name": ["bar", "bar"] }), has_entries({ "_count": is_(2), "type": "bar", "this-name": ["bar", "foo"] }))) def test_group_and_collect_with_injection_attempt(self): self.mongo_collection.save({"type": "foo", "this-name": "bar"}) self.mongo_collection.save({"type": "foo", "this-name": "bar"}) self.mongo_collection.save({"type": "bar", "this-name": "bar"}) self.mongo_collection.save({"type": "bar", "this-name": "foo"}) for collect_field in ["name']-foo", "name\\']-foo"]: results = self.mongo_driver.group(keys=["type"], query={}, collect_fields=[collect_field]) assert_that( results, contains(has_entries({ "_count": is_(2), "type": "foo" }), has_entries({ "_count": is_(2), "type": "bar" }))) def test_group_and_collect_with_false_value(self): self.mongo_collection.save({"foo": "one", "bar": False}) self.mongo_collection.save({"foo": "two", "bar": True}) self.mongo_collection.save({"foo": "two", "bar": True}) self.mongo_collection.save({"foo": "one", "bar": False}) results = self.mongo_driver.group(["foo"], {}, ["bar"]) assert_that( results, contains(has_entries({"bar": [False, False]}), has_entries({"bar": [True, True]}))) def test_group_without_keys(self): self._setup_people() results = self.mongo_driver.group(keys=[], query={}, collect_fields=[]) assert_that(results, contains(has_entries({"_count": is_(4)}), )) # this responsibility does not belong here def test_group_ignores_documents_without_grouping_keys(self): self._setup_people() self.mongo_collection.save({"name": "Yoko"}) results = self.mongo_driver.group(keys=["plays"], query={}, collect_fields=[]) assert_that( results, contains( has_entries({ "_count": is_(2), "plays": "guitar" }), has_entries({ "_count": is_(1), "plays": "bass" }), has_entries({ "_count": is_(1), "plays": "drums" }), )) def _setup_people(self): self.mongo_collection.save({"name": "George", "plays": "guitar"}) self.mongo_collection.save({"name": "John", "plays": "guitar"}) self.mongo_collection.save({"name": "Paul", "plays": "bass"}) self.mongo_collection.save({"name": "Ringo", "plays": "drums"}) def _setup_musical_instruments(self): self.mongo_collection.save({ "instrument": "flute", "type": "wind", "range": "high" }) self.mongo_collection.save({ "instrument": "contrabassoon", "type": "wind", "range": "low" }) self.mongo_collection.save({ "instrument": "violin", "type": "string", "range": "high" }) self.mongo_collection.save({ "instrument": "viola", "type": "string", "range": "high" }) self.mongo_collection.save({ "instrument": "cello", "type": "string", "range": "low" })
class wsTicker(object): def __init__(self, api=None): self.api = api if not self.api: self.api = Poloniex(jsonNums=float) self.db = MongoClient().poloniex['ticker'] self.db.drop() self.ws = websocket.WebSocketApp("wss://api2.poloniex.com/", on_message=self.on_message, on_error=self.on_error, on_close=self.on_close) self.ws.on_open = self.on_open def __call__(self, market=None): """ returns ticker from mongodb """ if market: return self.db.find_one({'_id': market}) return list(self.db.find()) def on_message(self, ws, message): message = json.loads(message) if 'error' in message: print(message['error']) return if message[0] == 1002: if message[1] == 1: print('Subscribed to ticker') return if message[1] == 0: print('Unsubscribed to ticker') return data = message[2] self.db.update_one( {"id": float(data[0])}, {"$set": {'last': data[1], 'lowestAsk': data[2], 'highestBid': data[3], 'percentChange': data[4], 'baseVolume': data[5], 'quoteVolume': data[6], 'isFrozen': data[7], 'high24hr': data[8], 'low24hr': data[9] }}, upsert=True) def on_error(self, ws, error): print(error) def on_close(self, ws): print("Websocket closed!") def on_open(self, ws): tick = self.api.returnTicker() for market in tick: self.db.update_one( {'_id': market}, {'$set': tick[market]}, upsert=True) print('Populated markets database with ticker data') self.ws.send(json.dumps({'command': 'subscribe', 'channel': 1002})) def start(self): self.t = Thread(target=self.ws.run_forever) self.t.daemon = True self.t.start() print('Thread started') def stop(self): self.ws.close() self.t.join() print('Thread joined')
class TestDataSetIntegration(unittest.TestCase): def setUp(self): self.storage = MongoStorageEngine.create(HOSTS, PORT, DB_NAME) self.config = { 'name': DATA_SET, 'data_group': "group", 'data_type': "type", 'max_age_expected': 1000, } self.data_set = DataSet(self.storage, self.config) self.mongo_collection = MongoClient(HOSTS, PORT)[DB_NAME][DATA_SET] def setup__timestamp_data(self): self.mongo_collection.save({ "_id": 'last', "_timestamp": d_tz(2013, 3, 1), "_week_start_at": d_tz(2013, 2, 25), "_updated_at": d_tz(2013, 8, 10) }) self.mongo_collection.save({ "_id": 'first', "_timestamp": d_tz(2013, 1, 1), "_week_start_at": d_tz(2012, 12, 31), "_updated_at": d_tz(2013, 9, 10) }) self.mongo_collection.save({ "_id": 'second', "_timestamp": d_tz(2013, 2, 1), "_week_start_at": d_tz(2013, 1, 28), "_updated_at": d_tz(2013, 10, 10) }) def tearDown(self): self.mongo_collection.drop() def test_period_queries_get_sorted_by__week_start_at(self): self.setup__timestamp_data() query = Query.create(period=WEEK) result = self.data_set.execute_query(query) assert_that( result, contains(has_entry('_start_at', d_tz(2012, 12, 31)), has_entry('_start_at', d_tz(2013, 1, 28)), has_entry('_start_at', d_tz(2013, 2, 25)))) def test_data_set_is_recent_enough(self): self.mongo_collection.save({ "_id": "first", "_updated_at": datetime.datetime.now() - datetime.timedelta(seconds=500) }) assert_that(self.data_set.is_recent_enough()) def test_data_set_is_not_recent_enough(self): self.mongo_collection.save({ "_id": "first", "_updated_at": datetime.datetime.now() - datetime.timedelta(seconds=50000) }) assert_that(not self.data_set.is_recent_enough())
class TestMongoDriver(unittest.TestCase): def setUp(self): self.mongo_driver = MongoDriver(MongoClient(HOST, PORT)[DB_NAME][BUCKET]) self.mongo_collection = MongoClient(HOST, PORT)[DB_NAME][BUCKET] self.mongo_collection.drop() def test_save(self): thing_to_save = {'name': 'test_document'} another_thing_to_save = {'name': '2nd_test_document'} self.mongo_driver.save(thing_to_save) self.mongo_driver.save(another_thing_to_save) results = self.mongo_collection.find() assert_that(results, has_item(thing_to_save)) assert_that(results, has_item(another_thing_to_save)) def test_save_updates_document_with_id(self): a_document = {"_id": "event1", "title": "I'm an event"} updated_document = {"_id": "event1", "title": "I'm another event"} self.mongo_driver.save(a_document) self.mongo_driver.save(updated_document) saved_documents = self.mongo_collection.find() assert_that(saved_documents, only_contains(updated_document)) def test_find_one(self): self._setup_people() result = self.mongo_driver.find_one(query={"name": "George"}) assert_that(result, has_entries({ "name": "George", "plays": "guitar" })) def test_find(self): self._setup_people() results = self.mongo_driver.find(query={"plays": "guitar"}, sort=["name", "ascending"], limit=None) assert_that(results, contains( has_entries({"name": "George", "plays": "guitar"}), has_entries({"name": "John", "plays": "guitar"}), )) def test_find_sort_descending(self): self._setup_people() results = self.mongo_driver.find(query={"plays": "guitar"}, sort=["name", "descending"], limit=None) assert_that(results, contains( has_entries({"name": "John", "plays": "guitar"}), has_entries({"name": "George", "plays": "guitar"}), )) def test_find_with_limit(self): self._setup_people() results = self.mongo_driver.find(query={"plays": {"$ne": "guitar"}}, sort=["name", "descending"], limit=1) assert_that(results, contains( has_entries({"name": "Ringo", "plays": "drums"}) )) def test_group(self): self._setup_musical_instruments() results = self.mongo_driver.group(keys=["type"], query={}, collect_fields=[]) assert_that(results, contains_inanyorder( has_entries({"_count": is_(2), "type": "wind"}), has_entries({"_count": is_(3), "type": "string"}) )) def test_group_with_query(self): self._setup_musical_instruments() results = self.mongo_driver.group(keys=["type"], query={"range": "high"}, collect_fields=[]) assert_that(results, contains_inanyorder( has_entries({"_count": is_(1), "type": "wind"}), has_entries({"_count": is_(2), "type": "string"}) )) def test_group_and_collect_additional_properties(self): self._setup_musical_instruments() results = self.mongo_driver.group(keys=["type"], query={}, collect_fields=["range"]) assert_that(results, contains( has_entries( {"_count": is_(2), "type": "wind", "range": ["high", "low"]}), has_entries( {"_count": is_(3), "type": "string", "range": ["high", "high", "low"]}) )) def test_group_and_collect_with_hyphen_in_field_name(self): self.mongo_collection.save({"type": "foo", "this-name": "bar"}) self.mongo_collection.save({"type": "foo", "this-name": "bar"}) self.mongo_collection.save({"type": "bar", "this-name": "bar"}) self.mongo_collection.save({"type": "bar", "this-name": "foo"}) results = self.mongo_driver.group(keys=["type"], query={}, collect_fields=["this-name"]) assert_that(results, contains( has_entries( {"_count": is_(2), "type": "foo", "this-name": ["bar", "bar"]}), has_entries( {"_count": is_(2), "type": "bar", "this-name": ["bar", "foo"]}) )) def test_group_and_collect_with_injection_attempt(self): self.mongo_collection.save({"type": "foo", "this-name": "bar"}) self.mongo_collection.save({"type": "foo", "this-name": "bar"}) self.mongo_collection.save({"type": "bar", "this-name": "bar"}) self.mongo_collection.save({"type": "bar", "this-name": "foo"}) for collect_field in ["name']-foo", "name\\']-foo"]: results = self.mongo_driver.group(keys=["type"], query={}, collect_fields=[collect_field]) assert_that(results, contains( has_entries( {"_count": is_(2), "type": "foo"}), has_entries( {"_count": is_(2), "type": "bar"}) )) def test_group_and_collect_with_false_value(self): self.mongo_collection.save({"foo": "one", "bar": False}) self.mongo_collection.save({"foo": "two", "bar": True}) self.mongo_collection.save({"foo": "two", "bar": True}) self.mongo_collection.save({"foo": "one", "bar": False}) results = self.mongo_driver.group(["foo"], {}, ["bar"]) assert_that(results, contains( has_entries({ "bar": [False, False] }), has_entries({ "bar": [True, True] }) )) def test_group_without_keys(self): self._setup_people() results = self.mongo_driver.group(keys=[], query={}, collect_fields=[]) assert_that(results, contains( has_entries({"_count": is_(4)}), )) # this responsibility does not belong here def test_group_ignores_documents_without_grouping_keys(self): self._setup_people() self.mongo_collection.save({"name": "Yoko"}) results = self.mongo_driver.group(keys=["plays"], query={}, collect_fields=[]) assert_that(results, contains( has_entries({"_count": is_(2), "plays": "guitar"}), has_entries({"_count": is_(1), "plays": "bass"}), has_entries({"_count": is_(1), "plays": "drums"}), )) def _setup_people(self): self.mongo_collection.save({"name": "George", "plays": "guitar"}) self.mongo_collection.save({"name": "John", "plays": "guitar"}) self.mongo_collection.save({"name": "Paul", "plays": "bass"}) self.mongo_collection.save({"name": "Ringo", "plays": "drums"}) def _setup_musical_instruments(self): self.mongo_collection.save( {"instrument": "flute", "type": "wind", "range": "high"}) self.mongo_collection.save( {"instrument": "contrabassoon", "type": "wind", "range": "low"}) self.mongo_collection.save( {"instrument": "violin", "type": "string", "range": "high"}) self.mongo_collection.save( {"instrument": "viola", "type": "string", "range": "high"}) self.mongo_collection.save( {"instrument": "cello", "type": "string", "range": "low"})
import json from pymongo import MongoClient with open("Source_Material/poems_etc/poems_etc_parsed/the_sonnets.json", "r") as s: sonnets = json.load(s) s.close() client = MongoClient() sonnet_collection = MongoClient().dustball_db.sonnets sonnet_collection.drop() def add_author(s): s["author_first_name"] = "william" s["author_last_name"] = "shakespeare" return s prepped_sonnets = list(map(add_author, sonnets)) sonnet_collection.insert_many(prepped_sonnets) sonnet_collection.create_index([("text", "text")]) client.close()
class TickerGenerator(object): def __init__(self, slack_info, mongo_ip): self.api = Poloniex() self.db = MongoClient(mongo_uri).poloniex['ticker'] self.db.drop() self.ws = websocket.WebSocketApp("wss://api2.poloniex.com/", on_message=self.on_message, on_error=self.on_error, on_close=self.on_close) self.ws.on_open = self.on_open self.slack_client = slack_info['client'] self.slack_channel_id_alerts = slack_info['channels']['alerts'][1] self.slack_channel_id_exceptions = slack_info['channels'][ 'exceptions'][1] self.last_update = None def __call__(self, market=None): if market: return self.db.find_one({'_id': market}) return list(self.db.find()) def on_message(self, ws, message): message = json.loads(message) #print(message) if 'error' in message: #print(message['error']) logger.error(message['error']) # SEND SLACK EXCEPTION MESSAGE HERE return if message[0] == 1002: if message[1] == 1: #print('Subscribed to ticker') logger.debug('Subscribed to ticker.') return if message[1] == 0: #print('Unsubscribed to ticker') logger.debug('Unsubscribed from ticker.') return data = message[2] self.db.update_one({"id": float(data[0])}, { "$set": { 'last': float(data[1]), 'lowestAsk': float(data[2]), 'highestBid': float(data[3]), 'percentChange': float(data[4]), 'baseVolume': float(data[5]), 'quoteVolume': float(data[6]), 'isFrozen': float(data[7]), 'high24hr': float(data[8]), 'low24hr': float(data[9]) } }, upsert=True) self.last_update = time.time() def on_error(self, ws, error): #print(error) logger.error(error) slack_message = 'Error returned from websocket connection:\n' slack_message += str(error) #slack_return = Ticker.send_slack_alert(self, channel_id=self.slack_channel_id_alerts, message=slack_message) slack_return = TickerGenerator.send_slack_alert( self, channel_id=self.slack_channel_id_alerts, message=slack_message) logger.debug('slack_return: ' + str(slack_return)) def on_close(self, ws): #print("Websocket closed!") logger.debug('Websocket closed.') slack_message = 'Websocket closed.' #slack_return = Ticker.send_slack_alert(self, channel_id=self.slack_channel_id_alerts, message=slack_message) slack_return = TickerGenerator.send_slack_alert( self, channel_id=self.slack_channel_id_alerts, message=slack_message) logger.debug('slack_return: ' + str(slack_return)) def on_open(self, ws): tick = self.api.returnTicker() for market in tick: self.db.update_one({'_id': market}, {'$set': tick[market]}, upsert=True) #print('Populated markets database with ticker data') logger.debug( 'Populated markets database with ticker data from REST API.') slack_message = 'MongoDB populated with REST API market ticker data.' #slack_return = Ticker.send_slack_alert(self, channel_id=self.slack_channel_id_alerts, message=slack_message) slack_return = TickerGenerator.send_slack_alert( self, channel_id=self.slack_channel_id_alerts, message=slack_message) logger.debug('slack_return: ' + str(slack_return)) #self.last_update = datetime.datetime.now() self.ws.send(json.dumps({'command': 'subscribe', 'channel': 1002})) slack_message = 'Subscribed to ticker websocket.' #slack_return = Ticker.send_slack_alert(self, channel_id=self.slack_channel_id_alerts, message=slack_message) slack_return = TickerGenerator.send_slack_alert( self, channel_id=self.slack_channel_id_alerts, message=slack_message) logger.debug('slack_return: ' + str(slack_return)) def start(self): self.t = Thread(target=self.ws.run_forever) self.t.daemon = True self.t.start() #print('Thread started') logger.debug('Thread started.') #slack_message = 'Ticker startup initialized.' slack_message = '\n*_Ticker startup initialized at ' + str( datetime.datetime.now()) + '._*\n\n' #slack_return = Ticker.send_slack_alert(self, channel_id=self.slack_channel_id_alerts, message=slack_message) slack_return = TickerGenerator.send_slack_alert( self, channel_id=self.slack_channel_id_alerts, message=slack_message) logger.debug('slack_return: ' + str(slack_return)) def stop(self): self.ws.close() self.t.join() #print('Thread joined') logger.debug('Thread joined.') slack_message = '*TICKER SHUTDOWN COMPLETED AT ' + str( datetime.datetime.now()) + '.*' #slack_return = Ticker.send_slack_alert(self, channel_id=self.slack_channel_id_alerts, message=slack_message) slack_return = TickerGenerator.send_slack_alert( self, channel_id=self.slack_channel_id_alerts, message=slack_message) logger.debug('slack_return: ' + str(slack_return)) def monitor(self, timeout, alert_reset_interval=10): error_timeout = timeout error_message_sent = False error_message_time = None error_message_reset = datetime.timedelta(minutes=alert_reset_interval) slack_message = '*MONITOR ACTIVE. TICKER READY FOR USE.*' #slack_return = ticker.send_slack_alert(channel_id=slack_channel_id_alerts, message=error_message) slack_return = TickerGenerator.send_slack_alert( self, channel_id=self.slack_channel_id_alerts, message=slack_message) logger.debug('slack_return: ' + str(slack_return)) while (True): try: #logger.debug('ticker.last_update: ' + str(ticker.last_update)) #if (datetime.datetime.now() - ticker.last_update) > error_timeout: #if (time.time() - ticker.last_update) > error_timeout: if (time.time() - self.last_update) > error_timeout: if error_message_sent == False: error_message = '*NO TICKER DATA RECEIVED IN 30 SECONDS. AN ERROR HAS OCCURRED THAT REQUIRES IMMEDIATE ATTENTION.*' #slack_return = ticker.send_slack_alert(channel_id=slack_channel_id_alerts, message=error_message) slack_return = TickerGenerator.send_slack_alert( self, channel_id=self.slack_channel_id_alerts, message=error_message) logger.debug('slack_return: ' + str(slack_return)) error_message_sent = True error_message_time = datetime.datetime.now() if error_message_sent == True and ( datetime.datetime.now() - error_message_time) > error_message_reset: logger.info( 'Resetting error message sent switch to allow another alert.' ) error_message_sent = False time.sleep(1) except Exception as e: logger.exception('Exception in inner loop.') logger.exception(e) except KeyboardInterrupt: logger.info( 'Exit signal raised in TickerGenerator.monitor. Breaking from monitor loop.' ) break def send_slack_alert(self, channel_id, message): alert_return = {'Exception': False, 'result': {}} try: alert_return['result'] = slack_client.api_call( 'chat.postMessage', channel=channel_id, text=message, username=slack_bot_user, icon_url=slack_bot_icon) except Exception as e: logger.exception( 'Exception raised in TickerGenerator.send_slack_alert().') logger.exception(e) alert_return['Exception'] = True finally: return alert_return
import csv import re from bson import SON from pymongo import MongoClient import sys collection = MongoClient().test.cafes print('Dropping test.cafes collection') collection.drop() # Pattern matching simple decimal numbers. float_pat = r'-?[0-9]+(\.[0-9]+)?' # Pattern matching addresses, which are like: # '123 MAIN ST (40.73, -73.98)' location_pat = re.compile( r'(?P<address>(.|\n)+)\n\((?P<lat>%s), (?P<lon>%s)' % ( float_pat, float_pat), re.MULTILINE) csv_file = csv.DictReader(open('sidewalk-cafes.csv')) n_lines = 0 batch = [] for line in csv_file: location_field = line.pop('Location 1') match = location_pat.match(location_field) assert match, repr(location_field) group_dict = match.groupdict() lon, lat = float(group_dict['lon']), float(group_dict['lat'])
newsApi = NewsApiClient(api_key='XXXXXXXXXXXXXXXXXXXXXXXXXXXXx') ## Get the top 5 Named Entities namedEntityCol = MongoClient().precogTask.namedEntityCollection col = namedEntityCol.find() count = 0 # wordList used to store documents sirectly from namedEntityCollection wordList = [] for k in sorted(col, key=lambda k: len(k['tweet']), reverse=True): count += 1 if count > 5: break wordList.append(k) dataForEntityAnalysis = MongoClient().precogTask.dataForAnalysis dataForEntityAnalysis.drop() for ne in wordList: word = ne['word'] tweets = ne['tweet'] articleLinks = [] allNews = newsApi.get_everything( q=word, language='en', sort_by='relevancy', ) articles = allNews['articles'] print("----------------------------------------------") print(word) for article in articles:
perfcl.update({"_id":tid},{"$set":t}) appl = data["appl"] t = aiodb.find_one({"key":key,"appl":appl}) if not t: meta = {"key":key,"appl":appl,"order":index} aiodb.insert(meta) else: if t["order"] < index: _id = t["_id"] aiodb.update({"_id":_id},{"$set":{"order":index}}) print "all in one update ", index if __name__ == "__main__": print datetime.now(),"start dedupe" st = time.time() aiodb.drop() for i in db.find().sort("index",-1): try: i["entry_time"] except: continue filename = i["file"] index = i["index"] try: home_date = config.get("base",filename) except: print "%s not found homedate" % filename continue home_date = datetime.strptime(home_date,r"%Y/%m/%d") exec("mailcl = rpdb.mail%s.mail" % index) exec("perfcl = rpdb.mail%s.perf" % index)
class TestDataSetIntegration(unittest.TestCase): def setUp(self): self.storage = MongoStorageEngine.create(HOSTS, PORT, DB_NAME) self.config = { 'name': DATA_SET, 'data_group': "group", 'data_type': "type", 'max_age_expected': 1000, } self.data_set = DataSet(self.storage, self.config) self.mongo_collection = MongoClient(HOSTS, PORT)[DB_NAME][DATA_SET] def setup__timestamp_data(self): self.mongo_collection.save({ "_id": 'last', "_timestamp": d_tz(2013, 3, 1), "_week_start_at": d_tz(2013, 2, 25), "_updated_at": d_tz(2013, 8, 10) }) self.mongo_collection.save({ "_id": 'first', "_timestamp": d_tz(2013, 1, 1), "_week_start_at": d_tz(2012, 12, 31), "_updated_at": d_tz(2013, 9, 10) }) self.mongo_collection.save({ "_id": 'second', "_timestamp": d_tz(2013, 2, 1), "_week_start_at": d_tz(2013, 1, 28), "_updated_at": d_tz(2013, 10, 10) }) def tearDown(self): self.mongo_collection.drop() def test_period_queries_get_sorted_by__week_start_at(self): self.setup__timestamp_data() query = Query.create(period=WEEK) result = self.data_set.execute_query(query) assert_that(result, contains( has_entry('_start_at', d_tz(2012, 12, 31)), has_entry('_start_at', d_tz(2013, 1, 28)), has_entry('_start_at', d_tz(2013, 2, 25)) )) def test_data_set_is_recent_enough(self): self.mongo_collection.save({ "_id": "first", "_updated_at": datetime.datetime.now() - datetime.timedelta(seconds=500) }) assert_that(self.data_set.is_recent_enough()) def test_data_set_is_not_recent_enough(self): self.mongo_collection.save({ "_id": "first", "_updated_at": datetime.datetime.now() - datetime.timedelta(seconds=50000) }) assert_that(not self.data_set.is_recent_enough())
class EventsTestMixin(TestCase): """ Helpers and setup for running tests that evaluate events emitted """ def setUp(self): super(EventsTestMixin, self).setUp() self.event_collection = MongoClient()["test"]["events"] self.reset_event_tracking() def reset_event_tracking(self): """Drop any events that have been collected thus far and start collecting again from scratch.""" self.event_collection.drop() self.start_time = datetime.now() @contextmanager def capture_events(self, event_filter=None, number_of_matches=1, captured_events=None): """ Context manager that captures all events emitted while executing a particular block. All captured events are stored in the list referenced by `captured_events`. Note that this list is appended to *in place*. The events will be appended to the list in the order they are emitted. The `event_filter` is expected to be a callable that allows you to filter the event stream and select particular events of interest. A dictionary `event_filter` is also supported, which simply indicates that the event should match that provided expectation. `number_of_matches` tells this context manager when enough events have been found and it can move on. The context manager will not exit until this many events have passed the filter. If not enough events are found before a timeout expires, then this will raise a `BrokenPromise` error. Note that this simply states that *at least* this many events have been emitted, so `number_of_matches` is simply a lower bound for the size of `captured_events`. """ start_time = datetime.utcnow() yield events = self.wait_for_events( start_time=start_time, event_filter=event_filter, number_of_matches=number_of_matches) if captured_events is not None and hasattr(captured_events, 'append') and callable(captured_events.append): for event in events: captured_events.append(event) @contextmanager def assert_events_match_during(self, event_filter=None, expected_events=None): """ Context manager that ensures that events matching the `event_filter` and `expected_events` are emitted. This context manager will filter out the event stream using the `event_filter` and wait for `len(expected_events)` to match the filter. It will then compare the events in order with their counterpart in `expected_events` to ensure they match the more detailed assertion. Typically `event_filter` will be an `event_type` filter and the `expected_events` list will contain more detailed assertions. """ captured_events = [] with self.capture_events(event_filter, len(expected_events), captured_events): yield self.assert_events_match(expected_events, captured_events) def wait_for_events(self, start_time=None, event_filter=None, number_of_matches=1, timeout=None): """ Wait for `number_of_matches` events to pass the `event_filter`. By default, this will look at all events that have been emitted since the beginning of the setup of this mixin. A custom `start_time` can be specified which will limit the events searched to only those emitted after that time. The `event_filter` is expected to be a callable that allows you to filter the event stream and select particular events of interest. A dictionary `event_filter` is also supported, which simply indicates that the event should match that provided expectation. `number_of_matches` lets us know when enough events have been found and it can move on. The function will not return until this many events have passed the filter. If not enough events are found before a timeout expires, then this will raise a `BrokenPromise` error. Note that this simply states that *at least* this many events have been emitted, so `number_of_matches` is simply a lower bound for the size of `captured_events`. Specifying a custom `timeout` can allow you to extend the default 30 second timeout if necessary. """ if start_time is None: start_time = self.start_time if timeout is None: timeout = 30 def check_for_matching_events(): """Gather any events that have been emitted since `start_time`""" return self.matching_events_were_emitted( start_time=start_time, event_filter=event_filter, number_of_matches=number_of_matches ) return Promise( check_for_matching_events, # This is a bit of a hack, Promise calls str(description), so I set the description to an object with a # custom __str__ and have it do some intelligent stuff to generate a helpful error message. CollectedEventsDescription( 'Waiting for {number_of_matches} events to match the filter:\n{event_filter}'.format( number_of_matches=number_of_matches, event_filter=self.event_filter_to_descriptive_string(event_filter), ), functools.partial(self.get_matching_events_from_time, start_time=start_time, event_filter={}) ), timeout=timeout ).fulfill() def matching_events_were_emitted(self, start_time=None, event_filter=None, number_of_matches=1): """Return True if enough events have been emitted that pass the `event_filter` since `start_time`.""" matching_events = self.get_matching_events_from_time(start_time=start_time, event_filter=event_filter) return len(matching_events) >= number_of_matches, matching_events def get_matching_events_from_time(self, start_time=None, event_filter=None): """ Return a list of events that pass the `event_filter` and were emitted after `start_time`. This function is used internally by most of the other assertions and convenience methods in this class. The `event_filter` is expected to be a callable that allows you to filter the event stream and select particular events of interest. A dictionary `event_filter` is also supported, which simply indicates that the event should match that provided expectation. """ if start_time is None: start_time = self.start_time if isinstance(event_filter, dict): event_filter = functools.partial(is_matching_event, event_filter) elif not callable(event_filter): raise ValueError( 'event_filter must either be a dict or a callable function with as single "event" parameter that ' 'returns a boolean value.' ) matching_events = [] cursor = self.event_collection.find( { "time": { "$gte": start_time } } ).sort("time", ASCENDING) for event in cursor: matches = False try: # Mongo automatically assigns an _id to all events inserted into it. We strip it out here, since # we don't care about it. del event['_id'] if event_filter is not None: # Typically we will be grabbing all events of a particular type, however, you can use arbitrary # logic to identify the events that are of interest. matches = event_filter(event) except AssertionError: # allow the filters to use "assert" to filter out events continue else: if matches is None or matches: matching_events.append(event) return matching_events def assert_matching_events_were_emitted(self, start_time=None, event_filter=None, number_of_matches=1): """Assert that at least `number_of_matches` events have passed the filter since `start_time`.""" description = CollectedEventsDescription( 'Not enough events match the filter:\n' + self.event_filter_to_descriptive_string(event_filter), functools.partial(self.get_matching_events_from_time, start_time=start_time, event_filter={}) ) self.assertTrue( self.matching_events_were_emitted( start_time=start_time, event_filter=event_filter, number_of_matches=number_of_matches ), description ) def assert_no_matching_events_were_emitted(self, event_filter, start_time=None): """Assert that no events have passed the filter since `start_time`.""" matching_events = self.get_matching_events_from_time(start_time=start_time, event_filter=event_filter) description = CollectedEventsDescription( 'Events unexpected matched the filter:\n' + self.event_filter_to_descriptive_string(event_filter), lambda: matching_events ) self.assertEquals(len(matching_events), 0, description) def assert_events_match(self, expected_events, actual_events): """ Assert that each item in the expected events sequence matches its counterpart at the same index in the actual events sequence. """ for expected_event, actual_event in zip(expected_events, actual_events): assert_event_matches( expected_event, actual_event, tolerate=EventMatchTolerates.lenient() ) def relative_path_to_absolute_uri(self, relative_path): """Return an aboslute URI given a relative path taking into account the test context.""" return urlparse.urljoin(BASE_URL, relative_path) def event_filter_to_descriptive_string(self, event_filter): """Find the source code of the callable or pretty-print the dictionary""" message = '' if callable(event_filter): file_name = '(unknown)' try: file_name = inspect.getsourcefile(event_filter) except TypeError: pass try: list_of_source_lines, line_no = inspect.getsourcelines(event_filter) except IOError: pass else: message = '{file_name}:{line_no}\n{hr}\n{event_filter}\n{hr}'.format( event_filter=''.join(list_of_source_lines).rstrip(), file_name=file_name, line_no=line_no, hr='-' * 20, ) if not message: message = '{hr}\n{event_filter}\n{hr}'.format( event_filter=pprint.pformat(event_filter), hr='-' * 20, ) return message
# =========================================================================== # Set up data structures text_processor_queue = queue.Queue(BUF_SIZE) db = MongoClient()[db][collection] model_queue = queue.Queue(1) annot_resp = queue.Queue(1) te = threading.Event() d = corpora.Dictionary() mif = queue.Queue(1) keyword_queue = queue.Queue(BUF_SIZE) lim_queue = queue.Queue(BUF_SIZE) mess_queue = queue.Queue(BUF_SIZE) # Clear database db.drop() # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s (%(threadName)s) %(message)s', filename='debug.log') logging.info('\n' * 5) logging.info('*' * 10 + 'ACTIVE STREAM' + '*' * 10) logging.info('Starting Application...') #for key in logging.Logger.manager.loggerDict: # logging.getLogger(key).setLevel(logging.CRITICAL) # Initialize Threads streamer = Streamer(name='Streamer',
class EventsTestMixin(TestCase): """ Helpers and setup for running tests that evaluate events emitted """ def setUp(self): super(EventsTestMixin, self).setUp() self.event_collection = MongoClient()["test"]["events"] self.reset_event_tracking() def reset_event_tracking(self): """Drop any events that have been collected thus far and start collecting again from scratch.""" self.event_collection.drop() self.start_time = datetime.now() @contextmanager def capture_events(self, event_filter=None, number_of_matches=1, captured_events=None): """ Context manager that captures all events emitted while executing a particular block. All captured events are stored in the list referenced by `captured_events`. Note that this list is appended to *in place*. The events will be appended to the list in the order they are emitted. The `event_filter` is expected to be a callable that allows you to filter the event stream and select particular events of interest. A dictionary `event_filter` is also supported, which simply indicates that the event should match that provided expectation. `number_of_matches` tells this context manager when enough events have been found and it can move on. The context manager will not exit until this many events have passed the filter. If not enough events are found before a timeout expires, then this will raise a `BrokenPromise` error. Note that this simply states that *at least* this many events have been emitted, so `number_of_matches` is simply a lower bound for the size of `captured_events`. """ start_time = datetime.utcnow() yield events = self.wait_for_events(start_time=start_time, event_filter=event_filter, number_of_matches=number_of_matches) if captured_events is not None and hasattr(captured_events, 'append') and callable( captured_events.append): for event in events: captured_events.append(event) @contextmanager def assert_events_match_during(self, event_filter=None, expected_events=None): """ Context manager that ensures that events matching the `event_filter` and `expected_events` are emitted. This context manager will filter out the event stream using the `event_filter` and wait for `len(expected_events)` to match the filter. It will then compare the events in order with their counterpart in `expected_events` to ensure they match the more detailed assertion. Typically `event_filter` will be an `event_type` filter and the `expected_events` list will contain more detailed assertions. """ captured_events = [] with self.capture_events(event_filter, len(expected_events), captured_events): yield self.assert_events_match(expected_events, captured_events) def wait_for_events(self, start_time=None, event_filter=None, number_of_matches=1, timeout=None): """ Wait for `number_of_matches` events to pass the `event_filter`. By default, this will look at all events that have been emitted since the beginning of the setup of this mixin. A custom `start_time` can be specified which will limit the events searched to only those emitted after that time. The `event_filter` is expected to be a callable that allows you to filter the event stream and select particular events of interest. A dictionary `event_filter` is also supported, which simply indicates that the event should match that provided expectation. `number_of_matches` lets us know when enough events have been found and it can move on. The function will not return until this many events have passed the filter. If not enough events are found before a timeout expires, then this will raise a `BrokenPromise` error. Note that this simply states that *at least* this many events have been emitted, so `number_of_matches` is simply a lower bound for the size of `captured_events`. Specifying a custom `timeout` can allow you to extend the default 30 second timeout if necessary. """ if start_time is None: start_time = self.start_time if timeout is None: timeout = 30 def check_for_matching_events(): """Gather any events that have been emitted since `start_time`""" return self.matching_events_were_emitted( start_time=start_time, event_filter=event_filter, number_of_matches=number_of_matches) return Promise( check_for_matching_events, # This is a bit of a hack, Promise calls str(description), so I set the description to an object with a # custom __str__ and have it do some intelligent stuff to generate a helpful error message. CollectedEventsDescription( 'Waiting for {number_of_matches} events to match the filter:\n{event_filter}' .format( number_of_matches=number_of_matches, event_filter=self.event_filter_to_descriptive_string( event_filter), ), functools.partial(self.get_matching_events_from_time, start_time=start_time, event_filter={})), timeout=timeout).fulfill() def matching_events_were_emitted(self, start_time=None, event_filter=None, number_of_matches=1): """Return True if enough events have been emitted that pass the `event_filter` since `start_time`.""" matching_events = self.get_matching_events_from_time( start_time=start_time, event_filter=event_filter) return len(matching_events) >= number_of_matches, matching_events def get_matching_events_from_time(self, start_time=None, event_filter=None): """ Return a list of events that pass the `event_filter` and were emitted after `start_time`. This function is used internally by most of the other assertions and convenience methods in this class. The `event_filter` is expected to be a callable that allows you to filter the event stream and select particular events of interest. A dictionary `event_filter` is also supported, which simply indicates that the event should match that provided expectation. """ if start_time is None: start_time = self.start_time if isinstance(event_filter, dict): event_filter = functools.partial(is_matching_event, event_filter) elif not callable(event_filter): raise ValueError( 'event_filter must either be a dict or a callable function with as single "event" parameter that ' 'returns a boolean value.') matching_events = [] cursor = self.event_collection.find({ "time": { "$gte": start_time } }).sort("time", ASCENDING) for event in cursor: matches = False try: # Mongo automatically assigns an _id to all events inserted into it. We strip it out here, since # we don't care about it. del event['_id'] if event_filter is not None: # Typically we will be grabbing all events of a particular type, however, you can use arbitrary # logic to identify the events that are of interest. matches = event_filter(event) except AssertionError: # allow the filters to use "assert" to filter out events continue else: if matches is None or matches: matching_events.append(event) return matching_events def assert_matching_events_were_emitted(self, start_time=None, event_filter=None, number_of_matches=1): """Assert that at least `number_of_matches` events have passed the filter since `start_time`.""" description = CollectedEventsDescription( 'Not enough events match the filter:\n' + self.event_filter_to_descriptive_string(event_filter), functools.partial(self.get_matching_events_from_time, start_time=start_time, event_filter={})) self.assertTrue( self.matching_events_were_emitted( start_time=start_time, event_filter=event_filter, number_of_matches=number_of_matches), description) def assert_no_matching_events_were_emitted(self, event_filter, start_time=None): """Assert that no events have passed the filter since `start_time`.""" matching_events = self.get_matching_events_from_time( start_time=start_time, event_filter=event_filter) description = CollectedEventsDescription( 'Events unexpected matched the filter:\n' + self.event_filter_to_descriptive_string(event_filter), lambda: matching_events) self.assertEquals(len(matching_events), 0, description) def assert_events_match(self, expected_events, actual_events): """ Assert that each item in the expected events sequence matches its counterpart at the same index in the actual events sequence. """ for expected_event, actual_event in zip(expected_events, actual_events): assert_event_matches(expected_event, actual_event, tolerate=EventMatchTolerates.lenient()) def relative_path_to_absolute_uri(self, relative_path): """Return an aboslute URI given a relative path taking into account the test context.""" return urlparse.urljoin(BASE_URL, relative_path) def event_filter_to_descriptive_string(self, event_filter): """Find the source code of the callable or pretty-print the dictionary""" message = '' if callable(event_filter): file_name = '(unknown)' try: file_name = inspect.getsourcefile(event_filter) except TypeError: pass try: list_of_source_lines, line_no = inspect.getsourcelines( event_filter) except IOError: pass else: message = '{file_name}:{line_no}\n{hr}\n{event_filter}\n{hr}'.format( event_filter=''.join(list_of_source_lines).rstrip(), file_name=file_name, line_no=line_no, hr='-' * 20, ) if not message: message = '{hr}\n{event_filter}\n{hr}'.format( event_filter=pprint.pformat(event_filter), hr='-' * 20, ) return message
class ModelTestCase(AsyncTestCase): def setUp(self): super().setUp() self.database = "database" self.collection = "collection" self.model = Model(self.database, self.collection) self.client = MongoClient()[self.database][self.collection] def tearDown(self): self.client.drop() def test_setup_database(self): self.assertEqual(self.database, self.model.database) def test_setup_collection(self): self.assertEqual(self.collection, self.model.collection) def test_setup_created(self): self.assertIsInstance(self.model.data['created'], datetime.datetime) def test_setup_updated(self): self.assertIsInstance(self.model.data['updated'], datetime.datetime) def test_should_not_override_created(self): created = datetime.datetime.now() model = Model(self.database, self.collection, data={'created': created}) self.assertEqual(created, model.data['created']) def test_should_not_override_updated(self): updated = datetime.datetime.now() model = Model(self.database, self.collection, data={'updated': updated}) self.assertEqual(updated, model.data['updated']) @gen_test async def test_find(self): _id = self.client.insert_one({}).inserted_id model = await Model.find(self.database, self.collection, _id) self.assertEqual(_id, model.data['_id']) @gen_test async def test_insert(self): model = Model(self.database, self.collection) await model.insert() data = self.client.find_one({'_id': model.data['_id']}) self.assertEqual(data, model.data) @gen_test async def test_update(self): _id = self.client.insert_one({}).inserted_id model = Model(self.database, self.collection, data={'_id': _id}) expected = model.data.copy() expected['name'] = 'batman' await model.update({'name': 'batman'}) self.assertEqual(expected, self.client.find_one({'_id': _id})) @gen_test async def test_delete(self): _id = self.client.insert_one({}).inserted_id model = Model(self.database, self.collection, data={'_id': _id}) await model.delete() self.assertFalse(self.client.find_one({'_id': _id}))
ret[key_val[0].strip().rstrip( )] = datetime.datetime.strptime(date, "%Y/%m/%d") else: ret[key_val[0].strip().rstrip().replace( '.', '_')] = key_val[1].rstrip().strip() cvs_arr_to_upload.append(convert2unicode(copy.deepcopy(ret))) if 'date' in ret: ret['date'] = temp_date return ret if __name__ == "__main__": file_name = '../' + sys.argv[1] + '.txt' if sys.argv[1] == 'PolicyCenter7.0': cvs_collection.drop() cvs_log_output = open(file_name).readlines() file_chunks = get_chunks(cvs_log_output) # Go through each file for chunk in file_chunks: # Get subset of file file_lines = cvs_log_output[chunk[0]:chunk[1]] # Go through each line of file for (i, line) in enumerate(file_lines): # A checkin appeared (cvs logs with '---' to break revision blocks) if (line.startswith('---')): s = i + 1 e = s # Get subset of description block to parse
def test_mongo_dump_and_restore(docker_container, tmp_path): # Dummy data insertion docs = [ {'name': 'col1doc1'}, {'name': 'col1doc2'}, {'name': 'col1doc3'}, ] inserted_doc_ids = None port = 27020 client = MongoClient(f'mongodb://localhost:{port}') uri = 'mongodb://localhost/tmpdb' dump_path = str(tmp_path / 'dump1.tgz') with docker_container('mongo:4.0', ports={'27017/tcp': str(port)}, appdir=str(tmp_path)) as container: # noqa: E501 wait_for_mongo_to_be_up(container) cmd_prefix = f'docker exec -i {container.id} ' inserted_doc_ids = client.db1['col1'].insert_many(docs).inserted_ids # Get a dump after inserting the documents with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix, uri=uri, collection='col1', db='db1') as (stream, stats): # noqa: E501 with open(dump_path, 'wb') as fp: fp.write(stream.read()) assert stats.num_docs == 3 # Doesn't count the number of docs if requested not to with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix, uri=uri, collection='col1', db='db1', count=False) as (_, stats): # noqa: E501 pass assert not stats.num_docs # Test if a dummy falsey command throws with pytest.raises(Exception) as exc: with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix + ' false ', uri=uri, collection='col1', db='db1') as _: # noqa: E501 pass assert re.search('exited with error code', str(exc)) with docker_container('mongo:4.0', ports={'27017/tcp': str(port)}, appdir=str(tmp_path)) as container: # noqa: E501 wait_for_mongo_to_be_up(container) def restore_dump(**kwargs): with open(dump_path, 'rb') as fp: return mongo_utils.mongo_restore( stream=fp, cmd_prefix=f'docker exec -i {container.id} ', uri=uri, collection='col2', db='db2', **kwargs, ) # Insert one document and check if it wasn't overwritten col = MongoClient(f'mongodb://localhost:{port}').db2['col2'] col.insert_one({ '_id': inserted_doc_ids[0], 'name': 'test', }) stats = restore_dump() assert {d['name'] for d in col.find()} == { 'test', 'col1doc2', 'col1doc3', } assert stats.num_docs == 2 # Checking if duplicated docs are properly returned col.drop() col.insert_one({ '_id': inserted_doc_ids[0], 'name': 'new doc 1', }) col.insert_one({'name': 'new doc 2'}) stats = restore_dump() assert stats.duplicated_ids == [inserted_doc_ids[0]] r = col.delete_many({'_id': {'$in': stats.duplicated_ids}}) LOGGER.warning(r.raw_result) stats = restore_dump() assert stats.num_docs == 1 assert set(stats.duplicated_ids) == set(inserted_doc_ids[1:]) assert {d['name'] for d in col.find()} == { 'col1doc1', 'new doc 2', 'col1doc2', 'col1doc3', } # Now drop the collection col.insert_one({'name': 'new doc'}) stats = restore_dump(drop=True) assert {d['name'] for d in col.find()} == { 'col1doc1', 'col1doc2', 'col1doc3', } assert stats.num_docs == 3
class EventsTestMixin(object): """ Helpers and setup for running tests that evaluate events emitted """ def setUp(self): super(EventsTestMixin, self).setUp() self.event_collection = MongoClient()["test"]["events"] self.reset_event_tracking() def assert_event_emitted_num_times(self, event_name, event_time, event_user_id, num_times_emitted, **kwargs): """ Tests the number of times a particular event was emitted. Extra kwargs get passed to the mongo query in the form: "event.<key>: value". :param event_name: Expected event name (e.g., "edx.course.enrollment.activated") :param event_time: Latest expected time, after which the event would fire (e.g., the beginning of the test case) :param event_user_id: user_id expected in the event :param num_times_emitted: number of times the event is expected to appear since the event_time """ find_kwargs = { "name": event_name, "time": { "$gt": event_time }, "event.user_id": int(event_user_id), } find_kwargs.update( {"event.{}".format(key): value for key, value in kwargs.items()}) matching_events = self.event_collection.find(find_kwargs) self.assertEqual(matching_events.count(), num_times_emitted, '\n'.join(str(event) for event in matching_events)) def reset_event_tracking(self): """ Resets all event tracking so that previously captured events are removed. """ self.event_collection.drop() self.start_time = datetime.now() def get_matching_events(self, username, event_type): """ Returns a cursor for the matching browser events related emitted for the specified username. """ return self.event_collection.find({ "username": username, "event_type": event_type, "time": { "$gt": self.start_time }, }) def verify_events_of_type(self, username, event_type, expected_events, expected_referers=None): """Verify that the expected events of a given type were logged. Args: username (str): The name of the user for which events will be tested. event_type (str): The type of event to be verified. expected_events (list): A list of dicts representing the events that should have been fired. expected_referers (list): A list of strings representing the referers for each event that should been fired (optional). If present, the actual referers compared with this list, checking that the expected_referers are the suffixes of actual_referers. For example, if one event is expected, specifying ["/account/settings"] will verify that the referer for the single event ends with "/account/settings". """ EmptyPromise( lambda: self.get_matching_events(username, event_type).count( ) >= len(expected_events), "Waiting for the minimum number of events of type {type} to have been recorded" .format(type=event_type)).fulfill() # Verify that the correct events were fired cursor = self.get_matching_events(username, event_type) actual_events = [] actual_referers = [] for __ in range(0, cursor.count()): emitted_data = cursor.next() event = emitted_data["event"] if emitted_data["event_source"] == "browser": event = json.loads(event) actual_events.append(event) actual_referers.append(emitted_data["referer"]) self.assertEqual(expected_events, actual_events) if expected_referers is not None: self.assertEqual(len(expected_referers), len(actual_referers), "Number of expected referers is incorrect") for index, actual_referer in enumerate(actual_referers): self.assertTrue( actual_referer.endswith(expected_referers[index]), "Refer '{0}' does not have correct suffix, '{1}'.".format( actual_referer, expected_referers[index]))
# -*- coding: utf-8 -*- """ Importazione del database dei ponti italiani. Pubblicati da IK2ANE in formato Excel, salvare sul file pontixls.csv. Rimuovere la prima riga (intestazione) e alcune delle ultime righe del file. Vedere: http://www.ik2ane.it Autore: Fabio Pani [IZ2UQF] <*****@*****.**> Licenza d'uso: GNU/GPL v3 (vedere file LICENSE allegato) """ from pymongo import MongoClient, GEOSPHERE from csv import reader from wwl import is_valid_locator, convert_locator, get_longitude, get_latitude ponti = MongoClient().hamradio.ponti ponti.drop() # ricostruisce la collection da capo with open('pontixls.csv', 'rb') as f: csvfile = reader(f) for row in csvfile: doc = dict(nome=row[0].strip(), frequenza=row[1].strip(), shift=row[2].strip(), tono=row[3].strip(), regione=row[4].strip().lower(), provincia=row[5].strip().upper(), localita=row[6].strip(), gruppo=row[7].strip(), identificatore=row[8].strip(), traslatore=row[9].strip(), locator=row[10].strip(), gestore=row[15].strip())