Ejemplo n.º 1
0
    def testUpdateDocument(self):
        ''' Function updates/increments a specific hour.minute in a document.   '''

        # Get connection to mongo
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections        
       
        # New timeseries object with data
        ts = timeSeries()
        ts.importData(self.kw, blockPrecision=24)

        success = ts.insertBlankDoc()
        self.assertEquals(success, 1)

        # Update/increment a specific hour.minute
        ts.updateCount()

        # Run a query for this item
        outDocs = dbh.timeseries.find({'data.12.1':1})

        for doc in outDocs:
            print doc
            self.assertEquals(doc['mgrs'], '38SND4595706622')

        # Close the connection
        mdb.close(c, dbh)
Ejemplo n.º 2
0
    def MongoLookup(self):
        ''' Fn checks whether a timeseries document already exists for this period.   '''

        # Get connection to mongo
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections        
        
        # New timeseries object with data
        ts = timeSeries()
        ts.importData(self.kw, blockPrecision=1)

        # Check the count - should be 0 before the doc gets inserted
        count = ts.mongoLookup()
        self.assertEquals(count, 0)
        
        # Build and insert a new mongo formatted document
        success = ts.insertBlankDoc()
        
        # Count should be 1 now that the document has been inserted
        count = ts.mongoLookup()
        self.assertEquals(count, 1)
        
        # Clean up, remove he content and close the connection
        #dbh.timeseries.remove()
        mdb.close(c, dbh)
Ejemplo n.º 3
0
    def testBuildFullArrayFlat(self):
        '''Build a full FLATTENED array from a cursor result'''
        
        st = datetime.datetime.utcnow()
        
        # A keyword that went in yesterday creates a timeseries yesterday
        nowDt = datetime.datetime(year=2011,month=1,day=12,hour=11,minute=1,second=1)
        oneDay= datetime.timedelta(days=1)

        # Get a db handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=nowDt-oneDay, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')
        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        # Insert 2ND DOC IN THE COLLECTION
        kword.timeStamp = nowDt 
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        nowDate = nowDt.replace(hour=0,minute=0,second=0,microsecond=0) 
        
        # Last 1 weeks worth of documents
        resultSet = bl.getResultsPerCell(dbh, '38SND4595706622', 'keyword1', nowDate, 168)
        # Close the connection
        mdb.close(c, dbh)

        # Inputs
        period = datetime.timedelta(days=7)
        dates, data = bl.buildFullArray(resultSet, nowDate, period, 1)
        
        
        firstDay = dates[0]
        lastDay = dates[-1]
        

        self.assertEquals(data.shape[0], 11520)
        self.assertEquals(firstDay, nowDate - period)
        self.assertEquals(lastDay, nowDate)
Ejemplo n.º 4
0
    def testGetAllCountForOneCell(self):
        ''' Gets a count for a single cell'''
        
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
 
        tweetTime = datetime.datetime(2011,1,2,12,5,15)
        oldTweetTime = tweetTime - datetime.timedelta(seconds=11*60)
 
        # Build a keyword to represent the basekine
        kword = kw(keyword='keyword1', timeStamp=oldTweetTime, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()

        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=tweetTime, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()


        # ALL DOCUMENTS
        mgrs    = '38SND4595706622'
        keyword = 'keyword1'
        
        # This indate represents when the baseline was run (12:10) minus the interest period (10 minutes)
        inDate = datetime.datetime(2011,1,2,12,0,0)
        results = bl.getResultsPerCell(dbh, collection='timeseries', mgrs=mgrs, keyword=keyword, inDate=inDate)

        self.assertEqual(len(results), 1)
Ejemplo n.º 5
0
    def testBuildFullArray(self):
        '''Build a full array from a cursor result'''
        
        # Get a db handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
 
        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=datetime.datetime(2011,1,2,12,1,1), lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        # Insert the doc now that its been modified
        kword.timeStamp = datetime.datetime(2011,1,1,12,1,1) 
        ts = timeSeries()
        ts.importData(kword)
        
        success = ts.insertBlankDoc()
        
        # Last 1 weeks worth of documents
        resultSet = bl.getResultsPerCell(dbh, '38SND4595706622', 'keyword1', datetime.datetime(2011,1,2), 168)
        
        # Inputs
        inDate = datetime.datetime(2011, 1, 2, 0, 0)
        period = datetime.timedelta(days=7)
        flat = None
        
        dates, data = bl.buildFullArray(resultSet, inDate, period, flat)
        
        self.assertEquals(len(dates), 8)
        self.assertEquals(len(data), 8)        

        # Close the connection
        mdb.close(c, dbh)
Ejemplo n.º 6
0
 def testImportData(self):
     ''' Check import of keyword object.'''
     
     # New timeseries object
     ts = timeSeries()
     
     # Import the data
     ts.importData(self.kw)
     
     print 'start',ts.start
     self.assertEquals(ts.start, datetime.datetime(2011,1,2,0,0,0))
     self.assertEquals(ts.source, 'twitter')
     self.assertEquals(ts.period, datetime.timedelta(hours=24))
     self.assertEquals(ts.mgrs, '38SND4595706622')
     
     # Close the connection
     ts.c.disconnect()
Ejemplo n.º 7
0
    def testGetAllCountForOneCellLookback(self):
        ''' Gets a count for a single cell'''

       
        tweetTime = datetime.datetime(2011,1,2,12,5,15)
        oldTweetTime = tweetTime - datetime.timedelta(seconds=15*60)
        baselineTime = datetime.datetime(2011,1,2,12,0,0)
       
        # Get a db handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
 
        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=tweetTime, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        # Last 2  documents
        lookback = 24
        mgrs    = '38SND4595706622'
        qKeyword = 'keyword1'
        res = bl.getResultsPerCell(dbh,
                                   collection='timeseries', 
                                   mgrs=mgrs, 
                                   keyword=qKeyword, 
                                   inDate=baselineTime,
                                   lookback=lookback)
        print res
        
        results = []
        for doc in res:
            print doc
            results.append(doc)
            
        self.assertEqual(len(results), 1)
        
        # Close the connection
        mdb.close(c, dbh)
Ejemplo n.º 8
0
    def InsertBlankDoc(self):
        ''' Checks the successful inserting of a mongo document '''
        
        # Get connection to mongo
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections        
        
        # New timeseries object with data
        ts = timeSeries()
        ts.importData(self.kw, blockPrecision=1)

        # Build and insert a new mongo formatted document
        success = ts.insertBlankDoc()
        self.assertEquals(success, 1)
        
        # Clean up and drop it
        #dbh.timeseries.remove()
        
        # Close the connection
        mdb.close(c, dbh)
Ejemplo n.º 9
0
    def testGetStart(self):
        ''' Checks getting the start dt '''
        
        # A days worth of hour 
        #times = list(datetime.datetime(2011,1,1, i, random.randint(0,59)) for i in range(0,24,4))
        inTime = datetime.datetime(2011,1,1,7) 
        
        blockPrecisions = [
                           [1,  datetime.datetime(2011,1,1,7)],
                           [4,  datetime.datetime(2011,1,1,4)],
                           [6,  datetime.datetime(2011,1,1,6)],
                           [12, datetime.datetime(2011,1,1,0)],
                           [24, datetime.datetime(2011,1,1,0)]
                           ]

        # Test the input block precision against the output dt        
        for bp in blockPrecisions:  
            ts = timeSeries()
            start = ts.getStart(timeStamp=inTime, blockPrecision=bp[0])
            self.assertEquals(start, bp[1])
            
            # Disconnect
            ts.c.disconnect()
Ejemplo n.º 10
0
    def testProcessBaselineLast30Days(self):
        ''' Checks accurate population of an array for 30 day all '''
        
        # Connect and get handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)

        # Set up some times to work with
        tweetTime = datetime.datetime.utcnow()
        thisMinute = tweetTime.replace(second=0,microsecond=0)
        today = tweetTime.replace(hour=0, minute=0, second=0, microsecond=0)
        
        # Thirty days ago - at the start of the day
        lastMonthTweet = tweetTime - datetime.timedelta(days=30)
        
        # Build a keyword object
        testKywd = kw(keyword='keyword1',
                      timeStamp=lastMonthTweet,
                      lat=34.4, lon=45.5,
                      text='this text contained the hashtag #keyword1',
                      tweetID=346664, userID=4444, source='twitter')
        
        # Insert a new timeseries object for the tweet 30 days ago
        ts = timeSeries()
        ts.importData(testKywd)
        success = ts.insertBlankDoc()
        ts.updateCount()
        
        # Create a keyword object for the current tweet
        testKywd2 = kw(keyword='keyword1',
                       timeStamp=lastMonthTweet + datetime.timedelta(hours=1),
                       lat=34.4, lon=45.5,
                       text='this text contained the hashtag #keyword1',
                       tweetID=346664, userID=4444, source='twitter')
        
        # Insert the current keyword - NOTE HOW THIS IS AFTER THE BASELINE BUILD
        ts2 = timeSeries()
        ts2.importData(testKywd2)
        success = ts2.insertBlankDoc()
        ts2.updateCount()
        
        # Create a keyword object for the current tweet
        testKywd3 = testKywd
        testKywd3.timeStamp = tweetTime
        # Instantiate the baseline object/class
        base = bl.baseline(kywd=testKywd3, cellBuildPeriod=600)
        if base.needUpdate == True:
            if not base.lastBaselined():
                doc = base.buildDoc()
                bl.insertBaselineDoc(dbh, doc)
        
        # Insert the current keyword - NOTE HOW THIS IS AFTER THE BASELINE BUILD
        ts3 = timeSeries()
        ts3.importData(testKywd3)
        success = ts3.insertBlankDoc()
        ts3.updateCount()

        tweetTimeMinus2Days = tweetTime - datetime.timedelta(days=2)
        
        # Create a new keyword object to test the daily slicing
        testKywd5 = kw(keyword='keyword1',
                       timeStamp=tweetTimeMinus2Days,
                       lat=34.4, lon=45.5,
                       text='this text contained the hashtag #keyword1',
                       tweetID=346664, userID=4444, source='twitter')
        
        # Insert the current keyword - NOTE HOW THIS IS AFTER THE BASELINE BUILD
        ts5 = timeSeries()
        ts5.importData(testKywd5)
        success = ts5.insertBlankDoc()
        ts5.updateCount()

        # Process Baseline
        base.processBaseline()
        
        # Get back the 30 day array
        arr = base.test30DayArray
        
        # Calculate what the array length should be
        soFarToday = (thisMinute - today).seconds/60.0
        
        # The start of the array datetime
        lastMonthDay = lastMonthTweet.replace(hour=0, minute=0, second=0, microsecond=0)
        
        # The number of days between today and the start of the array (then in minutes)
        dateDiff = (today - lastMonthDay)
        minsDiff = dateDiff.days*1440 + dateDiff.seconds/60.0 
        total = minsDiff + soFarToday
        
        # Confirm its the right length
        self.assertEqual(total, len(arr))
        
        # Get the minutes for the first 2 keywords (the third shouldn't be there)
        kwd1Min = int((testKywd.timeStamp - lastMonthDay).seconds/60)
        kwd2Min = int((testKywd2.timeStamp - lastMonthDay).seconds/60)
        
        kwd1Test = [arr[kwd1Min-1], arr[kwd1Min], arr[kwd1Min+1]]
        kwd2Test = [arr[kwd2Min-1], arr[kwd2Min], arr[kwd2Min+1]]
        
        for j in arr:
            if arr[j] > 0:
                print j, arr[j]
        
        self.assertEquals(kwd1Test, [0,1,0])
        self.assertEquals(kwd2Test, [0,1,0])
        
        # 30 DAY TIME SLICE CHECK
        arr = base.test30DaySliced
        # weekly 
        testSliced = int(30/7) * 6 * 60
        self.assertEquals(testSliced, len(arr))
        
        arr7Day = base.test7DayArray
        test7DayAll = (thisMinute - today).seconds/60.0 + 1440*7
        self.assertEquals(len(arr7Day), int(test7DayAll))
        
        arr30Hrs = base.test30hrArray
        test30Hours = 30*60
        self.assertEquals(len(arr30Hrs), int(test30Hours))
        
        # Close the connection
        mdb.close(c, dbh)