def testUpdateDocument(self): ''' Function updates/increments a specific hour.minute in a document. ''' # Get connection to mongo c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections # New timeseries object with data ts = timeSeries() ts.importData(self.kw, blockPrecision=24) success = ts.insertBlankDoc() self.assertEquals(success, 1) # Update/increment a specific hour.minute ts.updateCount() # Run a query for this item outDocs = dbh.timeseries.find({'data.12.1':1}) for doc in outDocs: print doc self.assertEquals(doc['mgrs'], '38SND4595706622') # Close the connection mdb.close(c, dbh)
def testInsertBaselineDoc(self): ''' Inserts a completed baseline document into the baseline collection.''' # Connect and get handle c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Build a keyword object testKywd = kw(keyword='keyword1', timeStamp=datetime.datetime(2011,6,22,12,10,45), lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # Instantiate the baseline object/class baseLine = bl.baseline(kywd=testKywd,cellBuildPeriod=600) # Build the document and insert it doc = baseLine.buildDoc() bl.insertBaselineDoc(dbh, doc) res = dbh.baseline.find()[0] print res self.assertEquals(res['keyword'], 'keyword1') self.assertEquals(res['mgrs'], '38SND4595706622') self.assertEquals(res['mgrsPrecision'], 10) # Close the connection mdb.close(c, dbh)
def MongoLookup(self): ''' Fn checks whether a timeseries document already exists for this period. ''' # Get connection to mongo c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections # New timeseries object with data ts = timeSeries() ts.importData(self.kw, blockPrecision=1) # Check the count - should be 0 before the doc gets inserted count = ts.mongoLookup() self.assertEquals(count, 0) # Build and insert a new mongo formatted document success = ts.insertBlankDoc() # Count should be 1 now that the document has been inserted count = ts.mongoLookup() self.assertEquals(count, 1) # Clean up, remove he content and close the connection #dbh.timeseries.remove() mdb.close(c, dbh)
def main(): # Config file parameters pathIn = '/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config/' fileIn = 'preAllocateTimeSeries.cfg' # Get parameters from config p = params(pathIn, fileIn) # Connect and get db and collection handle c, dbh = mdb.getHandle(p.host, p.port, p.db) collectionHandle = dbh[p.coll] # Current datetime #today = datetime.datetime.utcnow().replace(hour=0, minute=0,second=0,microsecond=0) today = datetime.datetime(2011,5,1) # Build some blank data blankDataArr = buildBlankData() # A list to hold the timeseries we need to pre-allocate for # Get pairs to be pre-allocated from yesterday - lookback is in days if 'yesterday' in p.baselineTypes: preAllocate, minCount, maxCount = getCommonMgrsAndKeyword(collectionHandle, p.mgrsPrecision, p.nothingFoundKeyword, today, lookback=1) # Now loop the keyword/mgrs pairs and build new timeseries documents for today for item in preAllocate: response = insertDoc(collectionHandle, item['mgrs'], p.mgrsPrecision, item['keyword'], 'twitter', today, buildBlankData()) mdb.close(c, dbh)
def deleteIndex(type): sql = f"delete from idx where type = '{type}'" conn = mdb.connectAShare() mdb.execute(conn, sql) print(f"{type} deleted") mdb.close(conn)
def getInfoFromDb(exchange, code): sql = f"select * from info where exchange = '{exchange}' and code = '{code}'" conn = mdb.connectAShare() data = mdb.query(conn, sql) mdb.close(conn) return data
def testlastBaselined(self): ''' Builds a baseline document for inserting.''' # Connect and get handle c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Build a keyword object testKywd = kw(keyword='keyword1', timeStamp=datetime.datetime(2011,6,22,12,10,45), lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # Create a new baseline object baseLine = bl.baseline(kywd=testKywd, cellBuildPeriod=600) baseLine.outputs['days30_all'] = 0.5 baseLine.outputs['days7_all'] = 0.4 baseLine.outputs['hrs30_all'] = 0.3 baseLine.outputs['days30_weekly'] = 0.2 baseLine.outputs['days7_daily'] = 0.1 doc = baseLine.buildDoc() bl.insertBaselineDoc(dbh, doc) # Method returns the date of last baseline calculation lastBaseline = baseLine.lastBaselined() self.assertEquals(lastBaseline, datetime.datetime(2011,6,22,12,10)) # Close the connection mdb.close(c, dbh)
def getExistingStocks(): sql = "select distinct exchange, code from day_k" conn = mdb.connectAShare() rs = mdb.query(conn, sql) mdb.close(conn) return rs
def main(configFile, subscriptionType, source): ''' Coordinates the retrieval of public CCTV camera URLs to crowded. ''' # Get the config information into a single object p = getConfigParameters(configFile) #//////////////////////////////////////////////////////// if source == 'cctv': url = p.tflUrl elif source == 'youtube': url = p.socialiseUrl elif source == 'flickr': url = p.viewFinderUrl # More sources here and adds to the config file #//////////////////////////////////////////////////////// # Mongo connection parameters c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) collHandle = dbh['subs'] evCollHandle = dbh['events'] # Get the active subs activeSubs = getActiveSubs(collHandle, type=subscriptionType) # Barf at this point if there's nothing in subs if not activeSubs or len(activeSubs) < 1: mdb.close(c, dbh) return None # For each active active subscription, query by geography for aSub in activeSubs: print 'ASUB:', aSub if subscriptionType == 'geography': lon, lat = aSub['loc'] radius = float(aSub['radius']) media = queryByGeo(url, lat, lon, radius) elif subscriptionType == 'tag': tag = aSub['objectId'] media = queryByTag(url, tag) # For each of the images, update the correct event url list for image in media: # Mod the datetime into a python dt try: img = datetime.datetime.strptime(image['captured'], "%Y-%m-%dT%H:%M:%S") except Exception, e: img = datetime.datetime.strptime(image['published'], "%Y-%m-%dT%H:%M:%S") image['dt'] = img success = updateEvents(evCollHandle, aSub['objectId'], image) if success == None: print "Failed to update event ID '%s' with media: \n %s" %(aSub['objectId'], image)
def persistIndex(type, data): sql = "insert into idx(type, exchange, code, name, update_date) values(?, ?, ?, ?, ?)" indexes = [] for index, row in data.iterrows(): indexes.append((type, row['code'][:2], row['code'][3:], row['code_name'], row['updateDate'])) conn = mdb.connectAShare() mdb.executeMany(conn, sql, indexes) mdb.close(conn) print(f"{len(data)} {type} persisted")
def getDailyK(exchange, code, startDate=None, endDate=None): sql = f"select date, open, close, high, low, volume from day_k where exchange = '{exchange}' and code = '{code}'" if startDate != None: sql = sql + f" and date >= '{startDate}'" if endDate != None: sql = sql + f" and date <= '{endDate}'" sql = sql + " order by date" conn = mdb.connectAShare() data = mdb.query(conn, sql) mdb.close(conn) data.index = pd.to_datetime(data['date']) return data
def persistInfo(data): sql = """insert into info(exchange, code, name, ipo_date, out_date, type, status) values('{exchange}', '{code}', '{code_name}', '{ipoDate}', {outDate}, {type}, {status}) """ conn = mdb.connectAShare() for index, row in data.iterrows(): row['exchange'] = row['code'][:2] row['code'] = row['code'][3:] row['outDate'] = '"' + row['outDate'] + '"' if len( row['outDate']) > 0 else "NULL" mdb.execute(conn, sql.format(**row)) mdb.close(conn) print(f"{len(data)} info persisted")
def main(configFile=None): ''' Takes the dotcloud default admin privs, authorises on the db, creates the user I've specified and returns. ''' # Get the parameters that were set up by dotcloud dcParams = getEnvironment() print "got DC environment settings." reParams = getRedisEnvironment() print "got redis environment settings." # Authenticate on the admin db try: c, adminDbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin', user=dcParams.adminUser, password=dcParams.adminPass) print 'got handle' except: print "Failed to get handle under admin." # Authentication of the administrator #try: # auth = adminDbh.authenticate(dcParams.adminUser, dcParams.adminPass) #except Exception, e: # print "Failed to authenticate with mongo db." # print e # Create a new user p = getConfigParameters(configFile) # Switch the database handle to that being used from the admin one dbh = c[p.db] success = dbh.add_user(p.dbUser, p.dbPassword) c.disconnect() try: # Authenticate on the admin db c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db=p.db, user=p.dbUser, password=p.dbPassword) print 'Connected to the normal db: %s' %(p.db) except: logging.critical("Failed to connect to db and get handle as user.", exc_info=True) sys.exit() # Write out the new information to the regular config file try: writeConfigFile(configFile, dcParams) print 'Writing out mongo config info.' writeConfigFileRedis(configFile, reParams) print 'Writing out redis config' except: logging.critical("Failed in writing params back to config file.", exc_info=True) mdb.close(c, dbh)
def main(configFile=None): ''' Builds the collections and indexes needed. ''' # Get the config information into a single object p = getConfigParameters(configFile) try: c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) except: logging.warning("Failed to connect to db and get handle.", exc_info=True) # The collections provided and create them and their indexes for coll in p.collections: collHandle = buildCollection(dbh, p, coll['collection']) indexes = buildIndexes(p, coll, collHandle) mdb.close(c, dbh)
def testBuildFullArrayFlat(self): '''Build a full FLATTENED array from a cursor result''' st = datetime.datetime.utcnow() # A keyword that went in yesterday creates a timeseries yesterday nowDt = datetime.datetime(year=2011,month=1,day=12,hour=11,minute=1,second=1) oneDay= datetime.timedelta(days=1) # Get a db handle c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections # Build a keyword kword = kw(keyword='keyword1', timeStamp=nowDt-oneDay, lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # New timeseries object ts = timeSeries() ts.importData(kword) success = ts.insertBlankDoc() # Insert 2ND DOC IN THE COLLECTION kword.timeStamp = nowDt ts = timeSeries() ts.importData(kword) success = ts.insertBlankDoc() nowDate = nowDt.replace(hour=0,minute=0,second=0,microsecond=0) # Last 1 weeks worth of documents resultSet = bl.getResultsPerCell(dbh, '38SND4595706622', 'keyword1', nowDate, 168) # Close the connection mdb.close(c, dbh) # Inputs period = datetime.timedelta(days=7) dates, data = bl.buildFullArray(resultSet, nowDate, period, 1) firstDay = dates[0] lastDay = dates[-1] self.assertEquals(data.shape[0], 11520) self.assertEquals(firstDay, nowDate - period) self.assertEquals(lastDay, nowDate)
def main(configFile=None): ''' Builds the collections and indexes needed. ''' # Get the config information into a single object p = getConfigParameters(configFile) try: c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) except: logging.warning("Failed to connect to db and get handle.", exc_info=True) # The collections provided and create them and their indexes for coll in p.collections: print "Building Collections and indexes: %s" %coll collHandle = buildCollection(dbh, p, coll['collection']) indexes = buildIndexes(p, coll, collHandle) mdb.close(c, dbh)
def testGetAllCountForOneCellLookback(self): ''' Gets a count for a single cell''' tweetTime = datetime.datetime(2011,1,2,12,5,15) oldTweetTime = tweetTime - datetime.timedelta(seconds=15*60) baselineTime = datetime.datetime(2011,1,2,12,0,0) # Get a db handle c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections # Build a keyword kword = kw(keyword='keyword1', timeStamp=tweetTime, lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # New timeseries object ts = timeSeries() ts.importData(kword) success = ts.insertBlankDoc() # Last 2 documents lookback = 24 mgrs = '38SND4595706622' qKeyword = 'keyword1' res = bl.getResultsPerCell(dbh, collection='timeseries', mgrs=mgrs, keyword=qKeyword, inDate=baselineTime, lookback=lookback) print res results = [] for doc in res: print doc results.append(doc) self.assertEqual(len(results), 1) # Close the connection mdb.close(c, dbh)
def InsertBlankDoc(self): ''' Checks the successful inserting of a mongo document ''' # Get connection to mongo c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections # New timeseries object with data ts = timeSeries() ts.importData(self.kw, blockPrecision=1) # Build and insert a new mongo formatted document success = ts.insertBlankDoc() self.assertEquals(success, 1) # Clean up and drop it #dbh.timeseries.remove() # Close the connection mdb.close(c, dbh)
def getEvents(p): ''' Returns all currently active events in mongo ''' # The mongo bits c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] try: docs = evCollHandle.find( fields=['objectId', 'subType', 'start', 'loc', 'radius']) docsOut = [d for d in docs] except: print "No documents matched your query. Object ID: %s." % objectId docsOut = [] mdb.close(c, dbh) # Additional fields that might be useful for doc in docsOut: # Get rid of the mongo ID _id = doc.pop('_id') if doc.has_key('loc'): # calculate the radius in metres latScale, lonScale = radialToLinearUnits(float(doc['loc'][1])) scale = (latScale + lonScale) / 2.0 doc['radius_m'] = int(doc['radius'] * scale) # Calculate the top left, bottom right s = doc['loc'][1] - doc['radius'] w = doc['loc'][0] - doc['radius'] n = doc['loc'][1] + doc['radius'] e = doc['loc'][0] + doc['radius'] doc['bbox'] = [[w, s], [e, n]] return docsOut
def persistDailyK(data): sql = """insert into day_k(exchange, date, code, open, high, low, close, pre_close, volume, amount, adjust_flag, turn, trade_status, pct_chg, pe_ttm, pb_mrq, ps_ttm, pcf_nc_ttm, is_st) values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """ kdata = [] for index, row in data.iterrows(): kdata.append( ( row['code'][:2], row['date'], row['code'][3:], row['open'], row['high'], row['low'], row['close'], row['preclose'], normalize(row['volume']), normalize(row['amount']), row['adjustflag'], normalize(row['turn']), row['tradestatus'], normalize(row['pctChg']), normalize(row['peTTM']), normalize(row['pbMRQ']), normalize(row['psTTM']), normalize(row['pcfNcfTTM']), row['isST'] ) ) conn = mdb.connectAShare() curr = 0 while curr < len(kdata): mdb.executeMany(conn, sql, kdata[curr:min(curr+1000, len(kdata))]) curr += 1000 mdb.close(conn)
def main(): # Should really move these to being parser = OptionParser() parser.add_option("-H", "--host", dest="host") parser.add_option("-p", "--port", dest="port") parser.add_option("-d", "--db", dest="db") parser.add_option("-m", "--mgrs", dest="mgrs") parser.add_option("-M", "--mgrsprecision", dest="mgrsPrecision") parser.add_option("-t", "--timestamp", dest="timeStamp") parser.add_option("-k", "--keyword", dest="keyword") parser.add_option("-u", "--baselineUnit", dest="baselineUnit") parser.add_option("-v", "--baselineValue", dest="baselineValue") (options, args) = parser.parse_args() # Format the option inputs = these really should be arguments port = int(options.port) timeStamp = datetime.datetime.strptime(options.timeStamp, "%Y-%m-%dT%H:%M:%S") mgrsPrecision = int(options.mgrsPrecision) baselinePrecision = [options.baselineUnit, int(options.baselineValue)] c, dbh = mdb.getHandle(host=options.host, port=port, db=options.db) # Build the baseline objects as we go so that they can be updated at the end of the period. base = baseline(options.mgrs, mgrsPrecision, options.keyword, timeStamp, c=c, dbh=dbh, baselinePrecision=baselinePrecision) # Does the baseline document need updating? if base.needUpdate == True: # This method takes care of update and insert base.processBaseline(tsd.buildBlankData()) try: mdb.close(c, dbh) except: pass
def main(): ''' Builds the collections and indexes needed for the bam mongo work. # See also /src/tests/testMdb for full tests of the base functions. ''' path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config" #path = 'home/dotcloud/code/config/' file = "mongoSetup.cfg" params = getConfig(path,file) # Get a db handle if params.verbose==True: print "Get Mongo Handle." c, dbh = mdb.getHandle(host=params.host, port=params.port, db=params.db) # Set up collections if params.verbose==True: print "Setup the mongo collections." mdb.setupCollections(c, dbh, params.db, params.collections, params.dropDb) # Get the collection handles timeSeriesHandle = dbh[params.timeseries] baselineHandle = dbh[params.baseline] alertsHandle = dbh[params.alerts] mappingHandle = dbh[params.mapping] # Set up the indexes on the collections if params.verbose==True: print "Setup the mongo indexes." setupTimeseriesIndexes(timeSeriesHandle, dropIndexes=params.dropIdx) setupAlertsIndexes(alertsHandle, dropIndexes=params.dropIdx) setupBaselineIndexes(baselineHandle, dropIndexes=params.dropIdx) # Close the connection if params.verbose==True: print "Closing the connection." mdb.close(c, dbh)
def testBuildFullArray(self): '''Build a full array from a cursor result''' # Get a db handle c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections # Build a keyword kword = kw(keyword='keyword1', timeStamp=datetime.datetime(2011,1,2,12,1,1), lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # New timeseries object ts = timeSeries() ts.importData(kword) success = ts.insertBlankDoc() # Insert the doc now that its been modified kword.timeStamp = datetime.datetime(2011,1,1,12,1,1) ts = timeSeries() ts.importData(kword) success = ts.insertBlankDoc() # Last 1 weeks worth of documents resultSet = bl.getResultsPerCell(dbh, '38SND4595706622', 'keyword1', datetime.datetime(2011,1,2), 168) # Inputs inDate = datetime.datetime(2011, 1, 2, 0, 0) period = datetime.timedelta(days=7) flat = None dates, data = bl.buildFullArray(resultSet, inDate, period, flat) self.assertEquals(len(dates), 8) self.assertEquals(len(data), 8) # Close the connection mdb.close(c, dbh)
def getMediaByObjectId(p, objectId): ''' Gets a mongo doc back based on the object ID. Called by the display page. ''' # The mongo bits c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] # The query into mongo that should only return 1 doc query = {'objectId': objectId} doc = evCollHandle.find(query) try: doc = [d for d in doc][0] except: print "No document matched your query. Object ID: %s." % objectId doc = None mdb.close(c, dbh) return doc
def main(configFile=None): ''' Takes the dotcloud default admin privs, authorises on the db, creates the user I've specified and returns. ''' # Get the parameters that were set up by dotcloud dcParams = getEnvironment() logging.info("Mongo Params:\n%s\n%s\n%s\n%s" %(dcParams.mongoHost, dcParams.mongoPort, dcParams.adminUser, dcParams.adminPass)) # Authenticate on the admin db try: c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin', user=dcParams.adminUser, password=dcParams.adminPass) except: logging.critical('Failed to connect to database as admin.') sys.exit() # Create a new user p = getConfigParameters(configFile) # Switch the database handle to that being used from the admin one dbh = c[p.db] success = dbh.add_user(p.dbUser, p.dbPassword) c.disconnect() try: # Authenticate on the admin db c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db=p.db, user=p.dbUser, password=p.dbPassword) except: logging.critical("Failed to connect to db and get handle as user.", exc_info=True) sys.exit() # Write out the new information to the regular config file try: writeConfigFile(configFile, dcParams) except: logging.critical("Failed in writing params back to config file.", exc_info=True) mdb.close(c, dbh)
def main(configFile=None): ''' Takes the dotcloud default admin privs, authorises on the db, creates the user I've specified and returns. ''' # Get the parameters that were set up by dotcloud dcParams = getEnvironment() # Authenticate on the admin db try: c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin', user=dcParams.adminUser, password=dcParams.adminPass) except: logging.critical('Failed to connect to database as admin.') sys.exit() # Create a new user p = getConfigParameters(configFile) # Switch the database handle to that being used from the admin one dbh = c[p.db] success = dbh.add_user(p.dbUser, p.dbPassword) c.disconnect() try: # Authenticate on the admin db c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db=p.db, user=p.dbUser, password=p.dbPassword) except: logging.critical("Failed to connect to db and get handle as user.", exc_info=True) sys.exit() # Write out the new information to the regular config file try: writeConfigFile(configFile, dcParams) print "----- writing out new config parameters." except: logging.critical("Failed in writing params back to config file.", exc_info=True) mdb.close(c, dbh)
def main(configFile=None): ''' Coordinates the management functions Command line called, typically from a CRON.''' # Get the config file p = getConfigParameters(configFile) # Logging config logFile = os.path.join(p.errorPath, p.errorFile) logging.basicConfig(filename=logFile, format='%(levelname)s:: \t%(asctime)s %(message)s', level=p.logLevel) # Streaming client connClientPath = os.path.dirname(p.errorPath) p.streamClient = os.path.join(connClientPath, 'src/connectionClient.py') # The mongo bits try: c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] mgmtCollHandle = dbh[p.mgmtCollection] logging.debug("Connected and authenticated on the db.") except: logging.critical('Failed to connect to db and authenticate.', exc_info=True) sys.exit() # Create a new management document if needed initialOID = setInitialPid(mgmtCollHandle) # Get the current events from crowded crowdedEvents = getCrowdedEvents(p) # Get the events currently stored by this app myEvents = getLocalEvents(p, evCollHandle) # Compare the 2 sets of events: what's old and new? oldEvents, newEvents = checkEvents(crowdedEvents, myEvents) # Expire old events from db, so that the new stream reflects the correct interest for oldEvent in oldEvents: print oldEvent logging.debug('Expiring Old Event in DB: %s' % (oldEvent)) res = expireOldEvent(evCollHandle, oldEvent) # Create new item in the db for newEvent in newEvents: logging.debug('Creating New Event in DB: %s' % (newEvent)) res = createLocalEvent(evCollHandle, newEvent) # Get the old process ID and kill it off pid = getPid(mgmtCollHandle) logging.debug('Current PID: %s' % (pid)) # Only continue if there is a change in the events if len(oldEvents) > 0 or len(newEvents) > 0: if pid: logging.debug('Killing old process with ID: %s' % (pid)) res = killOldProcess(pid) # Now create the new one newPid = processNewEvent(p) logging.debug('Creating a new process with PID: %s' % (newPid)) # Update the current process id in mongo res = storePid(mgmtCollHandle, newPid) logging.debug('Stored the new PID: %s' % (res)) mdb.close(c, dbh) logging.shutdown()
def main(timeStamp=None): ''' ''' print 'in main' # Get the config params into a object path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config" file = "periodicGeoAlert.cfg" params = getConfig(path,file) # Make the JMS connection via STOMP and the jmsCode class if params.publishJms: jms = jmsCode.jmsHandler(params.jHost, params.jPort, verbose=params.verbose) jms.connect() # Instantiate the mgrs lib m = mgrsLib.MGRS() # Time Variables if not timeStamp: now = datetime.datetime.utcnow() else: now = timeStamp nowMinute = getThisMinute(now) # Connect and get handle c, dbh = mdb.getHandle(params.mHost, params.mPort, params.mDb) # Assign collection handles to variables for easier passing baseCollHandle = dbh[params.baseColl] tsCollHandle = dbh[params.tsColl] mapCollHandle = dbh[params.cellColl] # Retrieve the active cells activeCells = getActiveCells(baseCollHandle, timeStamp=now, lookback=params.lookback, mgrsPrecision=params.mgrsPrecision) fxx = open(path+'outGeoJson.gjsn', 'w') # Loop those active cells for activeCell in activeCells: kywd = activeCell['keyword'] mgrs = activeCell['mgrs'] print mgrs # The period for this count value duration = datetime.timedelta(seconds=params.lookback) print 'duration', duration # The coordinates of the polygon to be mapped from MGRS coords = buildPolygon(m, mgrs, params.mgrsPrecision) print 'coords: ', coords # The total count value for this mgrs/keyword/mgrsPrecision count = getCountsForActiveCells(tsCollHandle, nowMinute, params.lookback, mgrs, params.mgrsPrecision, kywd) print 'count: %s' %count # ANOMALY: Get a list of metrics that indicated it was anomalous #anomalies = checkForAnomalies(activeCell, count) anomalies = None # A geoJson object representing all of this information geoJson = buildGeoJson(kywd, coords, mgrs, params.mgrsPrecision, now, duration, count, anomalies) # ANOMALY: If it was anomalous, push the geoJson to JMS if params.publishJms == True: jms.sendData(params.jDestination, geoJson) fxx.write(geoJson+'\n') # Insert the geoJson into the mapping collection if params.storeCell == True: success = insertGeoJson(mapCollHandle, reformatGeoJsonTime(geoJson)) print 'success: %s' %success #jms.disConnect() mdb.close(c, dbh) fxx.close()
def testProcessBaselineLast30Days(self): ''' Checks accurate population of an array for 30 day all ''' # Connect and get handle c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up some times to work with tweetTime = datetime.datetime.utcnow() thisMinute = tweetTime.replace(second=0,microsecond=0) today = tweetTime.replace(hour=0, minute=0, second=0, microsecond=0) # Thirty days ago - at the start of the day lastMonthTweet = tweetTime - datetime.timedelta(days=30) # Build a keyword object testKywd = kw(keyword='keyword1', timeStamp=lastMonthTweet, lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # Insert a new timeseries object for the tweet 30 days ago ts = timeSeries() ts.importData(testKywd) success = ts.insertBlankDoc() ts.updateCount() # Create a keyword object for the current tweet testKywd2 = kw(keyword='keyword1', timeStamp=lastMonthTweet + datetime.timedelta(hours=1), lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # Insert the current keyword - NOTE HOW THIS IS AFTER THE BASELINE BUILD ts2 = timeSeries() ts2.importData(testKywd2) success = ts2.insertBlankDoc() ts2.updateCount() # Create a keyword object for the current tweet testKywd3 = testKywd testKywd3.timeStamp = tweetTime # Instantiate the baseline object/class base = bl.baseline(kywd=testKywd3, cellBuildPeriod=600) if base.needUpdate == True: if not base.lastBaselined(): doc = base.buildDoc() bl.insertBaselineDoc(dbh, doc) # Insert the current keyword - NOTE HOW THIS IS AFTER THE BASELINE BUILD ts3 = timeSeries() ts3.importData(testKywd3) success = ts3.insertBlankDoc() ts3.updateCount() tweetTimeMinus2Days = tweetTime - datetime.timedelta(days=2) # Create a new keyword object to test the daily slicing testKywd5 = kw(keyword='keyword1', timeStamp=tweetTimeMinus2Days, lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # Insert the current keyword - NOTE HOW THIS IS AFTER THE BASELINE BUILD ts5 = timeSeries() ts5.importData(testKywd5) success = ts5.insertBlankDoc() ts5.updateCount() # Process Baseline base.processBaseline() # Get back the 30 day array arr = base.test30DayArray # Calculate what the array length should be soFarToday = (thisMinute - today).seconds/60.0 # The start of the array datetime lastMonthDay = lastMonthTweet.replace(hour=0, minute=0, second=0, microsecond=0) # The number of days between today and the start of the array (then in minutes) dateDiff = (today - lastMonthDay) minsDiff = dateDiff.days*1440 + dateDiff.seconds/60.0 total = minsDiff + soFarToday # Confirm its the right length self.assertEqual(total, len(arr)) # Get the minutes for the first 2 keywords (the third shouldn't be there) kwd1Min = int((testKywd.timeStamp - lastMonthDay).seconds/60) kwd2Min = int((testKywd2.timeStamp - lastMonthDay).seconds/60) kwd1Test = [arr[kwd1Min-1], arr[kwd1Min], arr[kwd1Min+1]] kwd2Test = [arr[kwd2Min-1], arr[kwd2Min], arr[kwd2Min+1]] for j in arr: if arr[j] > 0: print j, arr[j] self.assertEquals(kwd1Test, [0,1,0]) self.assertEquals(kwd2Test, [0,1,0]) # 30 DAY TIME SLICE CHECK arr = base.test30DaySliced # weekly testSliced = int(30/7) * 6 * 60 self.assertEquals(testSliced, len(arr)) arr7Day = base.test7DayArray test7DayAll = (thisMinute - today).seconds/60.0 + 1440*7 self.assertEquals(len(arr7Day), int(test7DayAll)) arr30Hrs = base.test30hrArray test30Hours = 30*60 self.assertEquals(len(arr30Hrs), int(test30Hours)) # Close the connection mdb.close(c, dbh)
def getIndex(type): conn = mdb.connectAShare() data = mdb.query(conn, f"select * from idx where type = '{type}'") mdb.close(conn) return data
def main(): ''' Script to build tweet objects from the VAST dataset and place them on a Queue and/or JMS for testing purposes. LIKELY SPEED IMPROVEMENTS: - BUILDING BLANK ARRAYS IN THE TIME SERIES TAKES A WHILE - PUTTING THE KEYWORDS IN A QUEUE, HAVING SET UP THE THREADS TO PROCESS EACH ONE. - ANY DUPLICATION CHECKS? ''' db = 'bam' host = 'localhost' port = 27017 start = datetime.datetime.utcnow() tweetProcessTimes = datetime.timedelta(seconds=0) blUnits = 'minute' blPrecision = 10 baselineParameters = [blUnits, blPrecision] mgrsPrecision = 2 #dripRate = 1.5 # JMS destination #destination = '/topic/test.vasttweets' #hostIn = 'localhost' #portIn = 61613 # Reset the collections c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections dbh = mdb.setupIndexes(dbh) #jms = jmsCode.jmsHandler(hostIn, portIn, verbose=True) # Make the JMS connection via STOMP and the jmsCode class #jms.connect() path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/data/" #fName= "MicroblogsSample.csv" fName= "MicroblogsOrdered.csv" tweetStats = 'tweetStatsFile_50000.csv' tptFile = open(path+tweetStats, 'w') # The script used to generate the baseline baselinePath = '/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/src/scripts/' baselineScript = 'subprocessBaseline.py' scriptFile = os.path.join(baselinePath, baselineScript) f = retrieveFile(path, fName) x = 0 # Start time earliestTweet = datetime.datetime(2011, 4, 30, 0, 0) earliestTweet = time.mktime(time.struct_time(earliestTweet.timetuple())) lastTweetTime = earliestTweet print "First Tweet Time: ", lastTweetTime # This speeds things up from seconds to minutes speedUpRate = 1000 # Build a blank timeseries array to save it being built everytime blankData = buildBlankData(hours=24) # Loop the lines build tweet objects for line in f.readlines(): #print line # Extract content from each line line = line.rstrip('\r').rstrip('\n').rstrip('\r') if x == 0: x+=1 continue if x % 100 == 0: print "processed: ", x if x >100000: print line break sys.exit(0) line = line.split(',') tweetProcessStart = datetime.datetime.utcnow() tweetId, dt, latLon, text = line # Get the geos geos = getGeos(tweetId, latLon) if not geos: print "skipping this record - bad or no geos" continue # Get the datetime group into seconds since UNIX time dtg = getTime(tweetId, dt) if not dtg: print "skipping this record - bad or no time" continue # Get the tweettime into seconds from UNIX tweetTime = time.mktime(time.struct_time(dtg.timetuple())) #print "The time of this tweet", tweetTime # Get the tweet time in seconds since the last tweet sinceLastTweet = tweetTime - lastTweetTime #print "Time since last tweet", sinceLastTweet #delay = sinceLastTweet / speedUpRate #print "Delay: ", delay # Apply a scaling to it #time.sleep(delay) # Assign this tweet time to be the last tweet time lastTweetTime = tweetTime # Build a tweet object twt = vastTweet() twt.importData(timeStamp=dtg, lat=geos[0], lon=geos[1], text=text, tweetId=tweetId) #---------------------------------------------------------------------------------- # PROCESS INTO KEYWORDS # Build into keywords - skipping a step for development kywd = processTweet(twt, mgrsPrecision) # Add keywords to the list based on hashtags kywd.fromHashTag() # Add keywords to the list based on name lookup kywd.fromLookup() if len(kywd.keywords) == 0: pass #print "No matches: ", twt.text xx = 0 #Now loop the resultant keywords for kwObj in kywd.keywords: xx += 1 #print "------------------" #print kwObj.keyword #print kwObj.text #------------------------------------------------------- # Pass keyword object into a class #ts = timeSeries(host='localhost', port=27017, db='bam') ts = timeSeries(c=c, dbh=dbh) ts.importData(kwObj, blockPrecision=24) success = ts.insertDoc(blankData=blankData, incrementBy=100) callBaseliner(scriptFile, host, port, db, kwObj, baselineParameters, mac=1) # METRICS - currently about 0.05 seconds per tweet tweetProcessStop = datetime.datetime.utcnow() tweetProcessTimes += (tweetProcessStop - tweetProcessStart) processDif = (tweetProcessStop - tweetProcessStart) tptFile.write(str(x)+","+str(xx)+","+str(processDif.seconds + processDif.microseconds/1000000.)+"\n") #---------------------------------------------------------------------------------- # SEND TO JMS WITH THIS CODE # Convert it into a JSON object #jTwt = twt.vastTweet2Json() #print jTwt # Push the JSON version of the tweet to the JMS #jms.sendData(destination, jTwt, x) #---------------------------------------------------------------------------------- x += 1 #time.sleep(dripRate) # Disconnect from the JMS #jms.disConnect() end = datetime.datetime.utcnow() dif = end - start print "Total Tweet Process Time: %s" %tweetProcessTimes.seconds print "Average Tweet process time: %s" % (float(tweetProcessTimes.seconds)/float(x)) print "Tweet Processed: %s" %x print "Total Process Time: %s" %(dif) # Close the mongo connection mdb.close(c, dbh) f.close() tptFile.close()
def buildSubscription(event): ''' Builds a new subscription based on an GET called event''' # Placeholder for doing this by users/algorithm? user = '******' cwd = os.getcwd() cfgs = os.path.join(cwd, 'config/crowded.cfg') p = getConfigParameters(cfgs) #print "Config Filepath in buildSubscription: ", cfgs # The mongo bits c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) subsCollHandle = dbh[p.subsCollection] evCollHandle = dbh[p.eventsCollection] # Check whether we definitely need a new subscription or not checked = checkForExistingSubs(p, subsCollHandle, event) # If the subscription doesn't already exist, if checked['exists'] == False: # Get the client and secret keys api = InstagramAPI(client_id=p.client, client_secret=p.secret) # If it's a geo-based subscription if event['object'] == 'geography': res = api.create_subscription(object='geography', lat=event['lat'], lng=event['lon'], radius=event['radius'], aspect='media', callback_url=p.subBaseUrl) print "Geo Subscription setup: %s" %res # A tag-based subscription elif event['object'] == 'tag': res = api.create_subscription(object='tag', object_id=event['tag'], aspect='media', callback_url=p.subBaseUrl) print "Tag Subscription setup: %s" %res # Just in case else: print 'Didnt setup a subscription' res = None # Update the subscription collection if res and res['meta']['code']==200: data = res['data'] subType = data['object'] objectId = data['object_id'] subId = data['id'] aspect = data['aspect'] success = updateSubs(subsCollHandle, subType, subId, objectId, aspect, event, user) # Build the response response = {'success' : True, 'objectId' : objectId, 'object' : subType, 'url' : "%s/%s" %(p.baseUrl, success)} # Insert a blank document to populate _id = buildEventPlaceholder(evCollHandle, subType, event, objectId) # Something failed in the subscription build...? else: print '='*40 print 'Failed here. No event placeholder or subscription updated.' print res print '='*40 response = {'success' : False, 'objectId' : checked['objectId'], 'object' : checked['object'], 'url' : "%s/%s" %(p.baseUrl, checked['objectId'])} # A valid subscription already exists elif checked['exists'] == True: response = {'success' : True, 'objectId' : checked['objectId'], 'object' : checked['object'], 'url' : "%s/%s" %(p.baseUrl, checked['objectId'])} # Close the connection/handle mdb.close(c, dbh) return response
p = getConfigParameters(configFile) # Get a db handle if p.verbose==True: print "---- Geting Mongo Handle." c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db) try: auth = dbh.authenticate(p.dbUser, p.dbPassword) except Exception, e: print "Failed to authenticate with mongo db." print e # The collections provided and create them and their indexes for coll in p.collections: collHandle = buildCollection(dbh, p, coll['collection']) indexes = buildIndexes(p, coll, collHandle) mdb.close(c, dbh) if __name__ == "__main__": # Command Line arguments configFile = sys.argv[1] # first argument is the config file path if not configFile: print 'no Config file provided. Exiting.' sys.exit() main(configFile)