Beispiel #1
0
    def testUpdateDocument(self):
        ''' Function updates/increments a specific hour.minute in a document.   '''

        # Get connection to mongo
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections        
       
        # New timeseries object with data
        ts = timeSeries()
        ts.importData(self.kw, blockPrecision=24)

        success = ts.insertBlankDoc()
        self.assertEquals(success, 1)

        # Update/increment a specific hour.minute
        ts.updateCount()

        # Run a query for this item
        outDocs = dbh.timeseries.find({'data.12.1':1})

        for doc in outDocs:
            print doc
            self.assertEquals(doc['mgrs'], '38SND4595706622')

        # Close the connection
        mdb.close(c, dbh)
Beispiel #2
0
    def testInsertBaselineDoc(self):
        ''' Inserts a completed baseline document into the baseline collection.'''
        
        # Connect and get handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)

        # Build a keyword object
        testKywd = kw(keyword='keyword1',
                   timeStamp=datetime.datetime(2011,6,22,12,10,45),
                   lat=34.4, lon=45.5,
                   text='this text contained the hashtag #keyword1',
                   tweetID=346664, userID=4444, source='twitter')
        
        # Instantiate the baseline object/class
        baseLine = bl.baseline(kywd=testKywd,cellBuildPeriod=600)

        # Build the document and insert it
        doc = baseLine.buildDoc()
        bl.insertBaselineDoc(dbh, doc)
        
        res = dbh.baseline.find()[0]
        print res
        
        self.assertEquals(res['keyword'], 'keyword1')
        self.assertEquals(res['mgrs'], '38SND4595706622')
        self.assertEquals(res['mgrsPrecision'], 10)

        # Close the connection
        mdb.close(c, dbh)
Beispiel #3
0
    def MongoLookup(self):
        ''' Fn checks whether a timeseries document already exists for this period.   '''

        # Get connection to mongo
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections        
        
        # New timeseries object with data
        ts = timeSeries()
        ts.importData(self.kw, blockPrecision=1)

        # Check the count - should be 0 before the doc gets inserted
        count = ts.mongoLookup()
        self.assertEquals(count, 0)
        
        # Build and insert a new mongo formatted document
        success = ts.insertBlankDoc()
        
        # Count should be 1 now that the document has been inserted
        count = ts.mongoLookup()
        self.assertEquals(count, 1)
        
        # Clean up, remove he content and close the connection
        #dbh.timeseries.remove()
        mdb.close(c, dbh)
Beispiel #4
0
def main():
    
    # Config file parameters
    pathIn = '/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config/'
    fileIn = 'preAllocateTimeSeries.cfg'
    
    # Get parameters from config
    p = params(pathIn, fileIn)
    
    # Connect and get db and collection handle
    c, dbh = mdb.getHandle(p.host, p.port, p.db)
    collectionHandle = dbh[p.coll]

    # Current datetime
    #today = datetime.datetime.utcnow().replace(hour=0, minute=0,second=0,microsecond=0)
    today = datetime.datetime(2011,5,1)
    
    # Build some blank data
    blankDataArr = buildBlankData()
    
    # A list to hold the timeseries we need to pre-allocate for
    
    # Get pairs to be pre-allocated from yesterday - lookback is in days
    if 'yesterday' in p.baselineTypes:
        preAllocate, minCount, maxCount = getCommonMgrsAndKeyword(collectionHandle, p.mgrsPrecision, p.nothingFoundKeyword, today, lookback=1)
    
    # Now loop the keyword/mgrs pairs and build new timeseries documents for today
    for item in preAllocate:
        response = insertDoc(collectionHandle, item['mgrs'], p.mgrsPrecision, item['keyword'], 'twitter', today, buildBlankData())
        
    mdb.close(c, dbh)
Beispiel #5
0
def deleteIndex(type):
    sql = f"delete from idx where type = '{type}'"

    conn = mdb.connectAShare()
    mdb.execute(conn, sql)
    print(f"{type} deleted")
    mdb.close(conn)
Beispiel #6
0
def getInfoFromDb(exchange, code):
    sql = f"select * from info where exchange = '{exchange}' and code = '{code}'"
    conn = mdb.connectAShare()
    data = mdb.query(conn, sql)

    mdb.close(conn)
    return data
Beispiel #7
0
    def testlastBaselined(self):
        ''' Builds a baseline document for inserting.'''

        # Connect and get handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)
        
        # Build a keyword object
        testKywd = kw(keyword='keyword1',
                   timeStamp=datetime.datetime(2011,6,22,12,10,45),
                   lat=34.4, lon=45.5,
                   text='this text contained the hashtag #keyword1',
                   tweetID=346664, userID=4444, source='twitter')
        
        # Create a new baseline object
        baseLine = bl.baseline(kywd=testKywd, cellBuildPeriod=600)
        
        baseLine.outputs['days30_all']      = 0.5
        baseLine.outputs['days7_all']       = 0.4
        baseLine.outputs['hrs30_all']       = 0.3
        baseLine.outputs['days30_weekly']   = 0.2
        baseLine.outputs['days7_daily']     = 0.1
        
        doc = baseLine.buildDoc()
        bl.insertBaselineDoc(dbh, doc)
        
        # Method returns the date of last baseline calculation
        lastBaseline = baseLine.lastBaselined()
        self.assertEquals(lastBaseline, datetime.datetime(2011,6,22,12,10))

        # Close the connection
        mdb.close(c, dbh)
Beispiel #8
0
def getExistingStocks():
    sql = "select distinct exchange, code from day_k"
    conn = mdb.connectAShare()

    rs = mdb.query(conn, sql)
    mdb.close(conn)

    return rs
Beispiel #9
0
def main(configFile, subscriptionType, source):
    ''' Coordinates the retrieval of public CCTV camera URLs to crowded. '''
    
    # Get the config information into a single object
    p = getConfigParameters(configFile)
    
    #////////////////////////////////////////////////////////
    if source == 'cctv':
        url = p.tflUrl
    elif source == 'youtube':
        url = p.socialiseUrl
    elif source == 'flickr':
        url = p.viewFinderUrl
    # More sources here and adds to the config file
    #////////////////////////////////////////////////////////
        
    # Mongo connection parameters
    c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword)
    collHandle = dbh['subs']
    evCollHandle = dbh['events']
    
    # Get the active subs
    activeSubs = getActiveSubs(collHandle, type=subscriptionType)

    # Barf at this point if there's nothing in subs
    if not activeSubs or len(activeSubs) < 1:
        mdb.close(c, dbh)
        return None

    # For each active active subscription, query by geography
    for aSub in activeSubs:
        
        print 'ASUB:', aSub
        if subscriptionType == 'geography':
            lon, lat = aSub['loc']
            radius = float(aSub['radius'])
            media = queryByGeo(url, lat, lon, radius)
        
        elif subscriptionType == 'tag':
            tag = aSub['objectId']
            media = queryByTag(url, tag)
        
        # For each of the images, update the correct event url list
        for image in media:
            # Mod the datetime into a python dt
            try:
                img = datetime.datetime.strptime(image['captured'], "%Y-%m-%dT%H:%M:%S")
            except Exception, e:
                img = datetime.datetime.strptime(image['published'], "%Y-%m-%dT%H:%M:%S")
            image['dt'] = img    
            
            success = updateEvents(evCollHandle, aSub['objectId'], image)
            if success == None:
                print "Failed to update event ID '%s' with media: \n %s" %(aSub['objectId'], image)
Beispiel #10
0
def persistIndex(type, data):
    sql = "insert into idx(type, exchange, code, name, update_date) values(?, ?, ?, ?, ?)"
    indexes = []

    for index, row in data.iterrows():
        indexes.append((type, row['code'][:2], row['code'][3:],
                        row['code_name'], row['updateDate']))

    conn = mdb.connectAShare()
    mdb.executeMany(conn, sql, indexes)
    mdb.close(conn)
    print(f"{len(data)} {type} persisted")
Beispiel #11
0
def getDailyK(exchange, code, startDate=None, endDate=None):
    sql = f"select date, open, close, high, low, volume from day_k where exchange = '{exchange}' and code = '{code}'"
    if startDate != None:
        sql = sql + f" and date >= '{startDate}'"
    if endDate != None:
        sql = sql + f" and date <= '{endDate}'"
    sql = sql + " order by date"
    conn = mdb.connectAShare()
    data = mdb.query(conn, sql)

    mdb.close(conn)
    data.index = pd.to_datetime(data['date'])
    return data
Beispiel #12
0
def persistInfo(data):
    sql = """insert into info(exchange, code, name, ipo_date, out_date, type, status)
             values('{exchange}', '{code}', '{code_name}', '{ipoDate}', {outDate}, {type}, {status})
          """

    conn = mdb.connectAShare()
    for index, row in data.iterrows():
        row['exchange'] = row['code'][:2]
        row['code'] = row['code'][3:]
        row['outDate'] = '"' + row['outDate'] + '"' if len(
            row['outDate']) > 0 else "NULL"
        mdb.execute(conn, sql.format(**row))

    mdb.close(conn)
    print(f"{len(data)} info persisted")
def main(configFile=None):
    ''' Takes the dotcloud default admin privs, authorises on the db, 
        creates the user I've specified and returns. '''
    
    # Get the parameters that were set up by dotcloud
    dcParams = getEnvironment()
    print "got DC environment settings."
    reParams = getRedisEnvironment()
    print "got redis environment settings."
    
    # Authenticate on the admin db
    try:
        c, adminDbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin', user=dcParams.adminUser, password=dcParams.adminPass)
        print 'got handle'
    except:
        print "Failed to get handle under admin."
    # Authentication of the administrator
    #try:
    #    auth = adminDbh.authenticate(dcParams.adminUser, dcParams.adminPass)
    #except Exception, e:
    #    print "Failed to authenticate with mongo db."
    #    print e
    
    # Create a new user
    p = getConfigParameters(configFile)
    # Switch the database handle to that being used from the admin one
    dbh = c[p.db]
    success = dbh.add_user(p.dbUser, p.dbPassword)
    c.disconnect()
    
    try:
        # Authenticate on the admin db
        c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db=p.db, user=p.dbUser, password=p.dbPassword)
        print 'Connected to the normal db: %s' %(p.db)
    except:
        logging.critical("Failed to connect to db and get handle as user.", exc_info=True)
        sys.exit()
    
    # Write out the new information to the regular config file
    try:
        writeConfigFile(configFile, dcParams)
        print 'Writing out mongo config info.'
        writeConfigFileRedis(configFile, reParams)
        print 'Writing out redis config'
    except:
        logging.critical("Failed in writing params back to config file.", exc_info=True)
    
    mdb.close(c, dbh)
Beispiel #14
0
def main(configFile=None):
    ''' Builds the collections and indexes needed. '''

    # Get the config information into a single object
    p = getConfigParameters(configFile)

    try:
        c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword)
    except:
        logging.warning("Failed to connect to db and get handle.", exc_info=True)

    # The collections provided and create them and their indexes
    for coll in p.collections:
        collHandle = buildCollection(dbh, p, coll['collection'])
        indexes = buildIndexes(p, coll, collHandle)
    
    mdb.close(c, dbh)
Beispiel #15
0
    def testBuildFullArrayFlat(self):
        '''Build a full FLATTENED array from a cursor result'''
        
        st = datetime.datetime.utcnow()
        
        # A keyword that went in yesterday creates a timeseries yesterday
        nowDt = datetime.datetime(year=2011,month=1,day=12,hour=11,minute=1,second=1)
        oneDay= datetime.timedelta(days=1)

        # Get a db handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=nowDt-oneDay, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')
        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        # Insert 2ND DOC IN THE COLLECTION
        kword.timeStamp = nowDt 
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        nowDate = nowDt.replace(hour=0,minute=0,second=0,microsecond=0) 
        
        # Last 1 weeks worth of documents
        resultSet = bl.getResultsPerCell(dbh, '38SND4595706622', 'keyword1', nowDate, 168)
        # Close the connection
        mdb.close(c, dbh)

        # Inputs
        period = datetime.timedelta(days=7)
        dates, data = bl.buildFullArray(resultSet, nowDate, period, 1)
        
        
        firstDay = dates[0]
        lastDay = dates[-1]
        

        self.assertEquals(data.shape[0], 11520)
        self.assertEquals(firstDay, nowDate - period)
        self.assertEquals(lastDay, nowDate)
Beispiel #16
0
def main(configFile=None):
    ''' Builds the collections and indexes needed. '''

    # Get the config information into a single object
    p = getConfigParameters(configFile)

    try:
        c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword)
    except:
        logging.warning("Failed to connect to db and get handle.", exc_info=True)

    # The collections provided and create them and their indexes
    for coll in p.collections:
        print "Building Collections and indexes: %s" %coll
        collHandle = buildCollection(dbh, p, coll['collection'])
        indexes = buildIndexes(p, coll, collHandle)
    
    mdb.close(c, dbh)
Beispiel #17
0
    def testGetAllCountForOneCellLookback(self):
        ''' Gets a count for a single cell'''

       
        tweetTime = datetime.datetime(2011,1,2,12,5,15)
        oldTweetTime = tweetTime - datetime.timedelta(seconds=15*60)
        baselineTime = datetime.datetime(2011,1,2,12,0,0)
       
        # Get a db handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
 
        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=tweetTime, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        # Last 2  documents
        lookback = 24
        mgrs    = '38SND4595706622'
        qKeyword = 'keyword1'
        res = bl.getResultsPerCell(dbh,
                                   collection='timeseries', 
                                   mgrs=mgrs, 
                                   keyword=qKeyword, 
                                   inDate=baselineTime,
                                   lookback=lookback)
        print res
        
        results = []
        for doc in res:
            print doc
            results.append(doc)
            
        self.assertEqual(len(results), 1)
        
        # Close the connection
        mdb.close(c, dbh)
Beispiel #18
0
    def InsertBlankDoc(self):
        ''' Checks the successful inserting of a mongo document '''
        
        # Get connection to mongo
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections        
        
        # New timeseries object with data
        ts = timeSeries()
        ts.importData(self.kw, blockPrecision=1)

        # Build and insert a new mongo formatted document
        success = ts.insertBlankDoc()
        self.assertEquals(success, 1)
        
        # Clean up and drop it
        #dbh.timeseries.remove()
        
        # Close the connection
        mdb.close(c, dbh)
Beispiel #19
0
def getEvents(p):
    ''' Returns all currently active events in mongo '''

    # The mongo bits
    c, dbh = mdb.getHandle(host=p.dbHost,
                           port=p.dbPort,
                           db=p.db,
                           user=p.dbUser,
                           password=p.dbPassword)
    evCollHandle = dbh[p.eventsCollection]

    try:
        docs = evCollHandle.find(
            fields=['objectId', 'subType', 'start', 'loc', 'radius'])
        docsOut = [d for d in docs]

    except:
        print "No documents matched your query. Object ID: %s." % objectId
        docsOut = []
    mdb.close(c, dbh)

    # Additional fields that might be useful
    for doc in docsOut:
        # Get rid of the mongo ID
        _id = doc.pop('_id')

        if doc.has_key('loc'):

            # calculate the radius in metres
            latScale, lonScale = radialToLinearUnits(float(doc['loc'][1]))
            scale = (latScale + lonScale) / 2.0
            doc['radius_m'] = int(doc['radius'] * scale)

            # Calculate the top left, bottom right
            s = doc['loc'][1] - doc['radius']
            w = doc['loc'][0] - doc['radius']
            n = doc['loc'][1] + doc['radius']
            e = doc['loc'][0] + doc['radius']
            doc['bbox'] = [[w, s], [e, n]]

    return docsOut
Beispiel #20
0
def persistDailyK(data):
    sql = """insert into day_k(exchange, date, code, open, high, low, close, pre_close, volume, 
             amount, adjust_flag, turn, trade_status, pct_chg, pe_ttm, pb_mrq, ps_ttm, pcf_nc_ttm, is_st)
             values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
          """

    kdata = []    
    for index, row in data.iterrows():
        kdata.append(
            (
                row['code'][:2],
                row['date'],
                row['code'][3:],
                row['open'],
                row['high'],
                row['low'],
                row['close'],
                row['preclose'],
                normalize(row['volume']),
                normalize(row['amount']),
                row['adjustflag'],
                normalize(row['turn']),
                row['tradestatus'],
                normalize(row['pctChg']),
                normalize(row['peTTM']),
                normalize(row['pbMRQ']),
                normalize(row['psTTM']),
                normalize(row['pcfNcfTTM']),
                row['isST']
            )
        )

    conn = mdb.connectAShare()
    curr = 0
    while curr < len(kdata):
        mdb.executeMany(conn, sql, kdata[curr:min(curr+1000, len(kdata))])
        curr += 1000

    mdb.close(conn)
Beispiel #21
0
def main():

    # Should really move these to being 
    parser = OptionParser()
    parser.add_option("-H", "--host",   dest="host")
    parser.add_option("-p", "--port",   dest="port")
    parser.add_option("-d", "--db",     dest="db")
    
    
    parser.add_option("-m", "--mgrs",               dest="mgrs")
    parser.add_option("-M", "--mgrsprecision",      dest="mgrsPrecision")
    parser.add_option("-t", "--timestamp",          dest="timeStamp")
    parser.add_option("-k", "--keyword",            dest="keyword")
    parser.add_option("-u", "--baselineUnit",       dest="baselineUnit")
    parser.add_option("-v", "--baselineValue",      dest="baselineValue")
    
    (options, args) = parser.parse_args()
    
    # Format the option inputs = these really should be arguments
    port              = int(options.port)
    timeStamp         = datetime.datetime.strptime(options.timeStamp, "%Y-%m-%dT%H:%M:%S")
    mgrsPrecision     = int(options.mgrsPrecision)
    baselinePrecision = [options.baselineUnit, int(options.baselineValue)]
    
    c, dbh = mdb.getHandle(host=options.host, port=port, db=options.db)
    
    # Build the baseline objects as we go so that they can be updated at the end of the period.
    base = baseline(options.mgrs, mgrsPrecision, options.keyword, timeStamp, c=c, dbh=dbh, baselinePrecision=baselinePrecision)

    # Does the baseline document need updating?
    if base.needUpdate == True:
        
        # This method takes care of update and insert
        base.processBaseline(tsd.buildBlankData())
        
    try:
        mdb.close(c, dbh)
    except:
        pass
Beispiel #22
0
def main():
    ''' Builds the collections and indexes needed for the bam mongo work.
        # See also /src/tests/testMdb for full tests of the base functions. '''
    
    path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config"
    #path = 'home/dotcloud/code/config/'
    file = "mongoSetup.cfg"
    params = getConfig(path,file)
    
    # Get a db handle
    if params.verbose==True:
        print "Get Mongo Handle."
    c, dbh = mdb.getHandle(host=params.host, port=params.port, db=params.db)

    # Set up collections
    if params.verbose==True:
        print "Setup the mongo collections."
    
    mdb.setupCollections(c, dbh, params.db, params.collections, params.dropDb)

    # Get the collection handles
    timeSeriesHandle = dbh[params.timeseries]
    baselineHandle   = dbh[params.baseline]
    alertsHandle     = dbh[params.alerts]
    mappingHandle    = dbh[params.mapping]
    
    # Set up the indexes on the collections
    if params.verbose==True:
        print "Setup the mongo indexes."
    
    setupTimeseriesIndexes(timeSeriesHandle, dropIndexes=params.dropIdx)
    setupAlertsIndexes(alertsHandle, dropIndexes=params.dropIdx)
    setupBaselineIndexes(baselineHandle, dropIndexes=params.dropIdx)
        
    # Close the connection
    if params.verbose==True:
        print "Closing the connection."
    
    mdb.close(c, dbh)
Beispiel #23
0
    def testBuildFullArray(self):
        '''Build a full array from a cursor result'''
        
        # Get a db handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
 
        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=datetime.datetime(2011,1,2,12,1,1), lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        # Insert the doc now that its been modified
        kword.timeStamp = datetime.datetime(2011,1,1,12,1,1) 
        ts = timeSeries()
        ts.importData(kword)
        
        success = ts.insertBlankDoc()
        
        # Last 1 weeks worth of documents
        resultSet = bl.getResultsPerCell(dbh, '38SND4595706622', 'keyword1', datetime.datetime(2011,1,2), 168)
        
        # Inputs
        inDate = datetime.datetime(2011, 1, 2, 0, 0)
        period = datetime.timedelta(days=7)
        flat = None
        
        dates, data = bl.buildFullArray(resultSet, inDate, period, flat)
        
        self.assertEquals(len(dates), 8)
        self.assertEquals(len(data), 8)        

        # Close the connection
        mdb.close(c, dbh)
Beispiel #24
0
def getMediaByObjectId(p, objectId):
    ''' Gets a mongo doc back based on the object ID. Called by the display page. '''

    # The mongo bits
    c, dbh = mdb.getHandle(host=p.dbHost,
                           port=p.dbPort,
                           db=p.db,
                           user=p.dbUser,
                           password=p.dbPassword)
    evCollHandle = dbh[p.eventsCollection]

    # The query into mongo that should only return 1 doc
    query = {'objectId': objectId}
    doc = evCollHandle.find(query)

    try:
        doc = [d for d in doc][0]
    except:
        print "No document matched your query. Object ID: %s." % objectId
        doc = None
    mdb.close(c, dbh)

    return doc
Beispiel #25
0
def main(configFile=None):
    ''' Takes the dotcloud default admin privs, authorises on the db, 
        creates the user I've specified and returns. '''
    
    # Get the parameters that were set up by dotcloud
    dcParams = getEnvironment()
    logging.info("Mongo Params:\n%s\n%s\n%s\n%s" %(dcParams.mongoHost, dcParams.mongoPort, dcParams.adminUser, dcParams.adminPass))
       
    # Authenticate on the admin db
    try:
        c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin', user=dcParams.adminUser, password=dcParams.adminPass)
    except:
        logging.critical('Failed to connect to database as admin.')
        sys.exit()
        
    # Create a new user
    p = getConfigParameters(configFile)
    # Switch the database handle to that being used from the admin one
    dbh = c[p.db]
    success = dbh.add_user(p.dbUser, p.dbPassword)
    c.disconnect()

    try:
        # Authenticate on the admin db
        c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db=p.db, user=p.dbUser, password=p.dbPassword)
    except:
        logging.critical("Failed to connect to db and get handle as user.", exc_info=True)
        sys.exit()
        
    # Write out the new information to the regular config file
    try:
        writeConfigFile(configFile, dcParams)
    except:
        logging.critical("Failed in writing params back to config file.", exc_info=True)
        
    mdb.close(c, dbh)
Beispiel #26
0
def main(configFile=None):
    ''' Takes the dotcloud default admin privs, authorises on the db, 
        creates the user I've specified and returns. '''
    
    # Get the parameters that were set up by dotcloud
    dcParams = getEnvironment()
    
    # Authenticate on the admin db
    try:
        c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin', user=dcParams.adminUser, password=dcParams.adminPass)
    except:
        logging.critical('Failed to connect to database as admin.')
        sys.exit()

    # Create a new user
    p = getConfigParameters(configFile)
    # Switch the database handle to that being used from the admin one
    dbh = c[p.db]
    success = dbh.add_user(p.dbUser, p.dbPassword)
    c.disconnect()
    
    try:
        # Authenticate on the admin db
        c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db=p.db, user=p.dbUser, password=p.dbPassword)
    except:
        logging.critical("Failed to connect to db and get handle as user.", exc_info=True)
        sys.exit()
    
    # Write out the new information to the regular config file
    try:
        writeConfigFile(configFile, dcParams)
        print "----- writing out new config parameters."
    except:
        logging.critical("Failed in writing params back to config file.", exc_info=True)
    
    mdb.close(c, dbh)
Beispiel #27
0
def main(configFile=None):
    ''' Coordinates the management functions
        Command line called, typically from a CRON.'''

    # Get the config file
    p = getConfigParameters(configFile)

    # Logging config
    logFile = os.path.join(p.errorPath, p.errorFile)
    logging.basicConfig(filename=logFile,
                        format='%(levelname)s:: \t%(asctime)s %(message)s',
                        level=p.logLevel)

    # Streaming client
    connClientPath = os.path.dirname(p.errorPath)
    p.streamClient = os.path.join(connClientPath, 'src/connectionClient.py')

    # The mongo bits
    try:
        c, dbh = mdb.getHandle(host=p.dbHost,
                               port=p.dbPort,
                               db=p.db,
                               user=p.dbUser,
                               password=p.dbPassword)
        evCollHandle = dbh[p.eventsCollection]
        mgmtCollHandle = dbh[p.mgmtCollection]
        logging.debug("Connected and authenticated on the db.")
    except:
        logging.critical('Failed to connect to db and authenticate.',
                         exc_info=True)
        sys.exit()

    # Create a new management document if needed
    initialOID = setInitialPid(mgmtCollHandle)

    # Get the current events from crowded
    crowdedEvents = getCrowdedEvents(p)

    # Get the events currently stored by this app
    myEvents = getLocalEvents(p, evCollHandle)

    # Compare the 2 sets of events: what's old and new?
    oldEvents, newEvents = checkEvents(crowdedEvents, myEvents)

    # Expire old events from db, so that the new stream reflects the correct interest
    for oldEvent in oldEvents:
        print oldEvent
        logging.debug('Expiring Old Event in DB: %s' % (oldEvent))
        res = expireOldEvent(evCollHandle, oldEvent)

    # Create new item in the db
    for newEvent in newEvents:
        logging.debug('Creating New Event in DB: %s' % (newEvent))
        res = createLocalEvent(evCollHandle, newEvent)

    # Get the old process ID and kill it off
    pid = getPid(mgmtCollHandle)
    logging.debug('Current PID: %s' % (pid))

    # Only continue if there is a change in the events
    if len(oldEvents) > 0 or len(newEvents) > 0:

        if pid:
            logging.debug('Killing old process with ID: %s' % (pid))
            res = killOldProcess(pid)

        # Now create the new one
        newPid = processNewEvent(p)
        logging.debug('Creating a new process with PID: %s' % (newPid))

        # Update the current process id in mongo
        res = storePid(mgmtCollHandle, newPid)
        logging.debug('Stored the new PID: %s' % (res))

    mdb.close(c, dbh)
    logging.shutdown()
Beispiel #28
0
def main(timeStamp=None):
    
    '''

    '''
    print 'in main'
    
    # Get the config params into a object
    path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config"
    file = "periodicGeoAlert.cfg"
    params = getConfig(path,file)

    # Make the JMS connection via STOMP and the jmsCode class
    if params.publishJms:
        jms = jmsCode.jmsHandler(params.jHost, params.jPort, verbose=params.verbose)
        jms.connect()
        
    # Instantiate the mgrs lib
    m = mgrsLib.MGRS()

    # Time Variables
    if not timeStamp:
        now = datetime.datetime.utcnow()
    else:
        now = timeStamp
    
    nowMinute = getThisMinute(now)
    
    # Connect and get handle
    c, dbh = mdb.getHandle(params.mHost, params.mPort, params.mDb)
    
    # Assign collection handles to variables for easier passing
    baseCollHandle  = dbh[params.baseColl]
    tsCollHandle    = dbh[params.tsColl]
    mapCollHandle   = dbh[params.cellColl]
    
    # Retrieve the active cells
    activeCells = getActiveCells(baseCollHandle, timeStamp=now, lookback=params.lookback, mgrsPrecision=params.mgrsPrecision)
    
    fxx = open(path+'outGeoJson.gjsn', 'w')
    
    # Loop those active cells
    for activeCell in activeCells:
    
        kywd = activeCell['keyword']
        mgrs = activeCell['mgrs']
        print mgrs
        # The period for this count value
        duration = datetime.timedelta(seconds=params.lookback)
        print 'duration', duration
        # The coordinates of the polygon to be mapped from MGRS
        coords = buildPolygon(m, mgrs, params.mgrsPrecision)
        print 'coords: ', coords
        # The total count value for this mgrs/keyword/mgrsPrecision
        count = getCountsForActiveCells(tsCollHandle, nowMinute, params.lookback, mgrs, params.mgrsPrecision, kywd)
        print 'count: %s' %count
        # ANOMALY: Get a list of metrics that indicated it was anomalous
        #anomalies = checkForAnomalies(activeCell, count)
        anomalies = None
        
        # A geoJson object representing all of this information
        geoJson = buildGeoJson(kywd, coords, mgrs, params.mgrsPrecision, now, duration, count, anomalies)
        
        # ANOMALY: If it was anomalous, push the geoJson to JMS
        if params.publishJms == True:
            jms.sendData(params.jDestination, geoJson)
            fxx.write(geoJson+'\n')
            
        # Insert the geoJson into the mapping collection
        if params.storeCell == True:
            success = insertGeoJson(mapCollHandle, reformatGeoJsonTime(geoJson))
            print 'success: %s' %success
            
    #jms.disConnect()
    mdb.close(c, dbh)
    fxx.close()
Beispiel #29
0
    def testProcessBaselineLast30Days(self):
        ''' Checks accurate population of an array for 30 day all '''
        
        # Connect and get handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)

        # Set up some times to work with
        tweetTime = datetime.datetime.utcnow()
        thisMinute = tweetTime.replace(second=0,microsecond=0)
        today = tweetTime.replace(hour=0, minute=0, second=0, microsecond=0)
        
        # Thirty days ago - at the start of the day
        lastMonthTweet = tweetTime - datetime.timedelta(days=30)
        
        # Build a keyword object
        testKywd = kw(keyword='keyword1',
                      timeStamp=lastMonthTweet,
                      lat=34.4, lon=45.5,
                      text='this text contained the hashtag #keyword1',
                      tweetID=346664, userID=4444, source='twitter')
        
        # Insert a new timeseries object for the tweet 30 days ago
        ts = timeSeries()
        ts.importData(testKywd)
        success = ts.insertBlankDoc()
        ts.updateCount()
        
        # Create a keyword object for the current tweet
        testKywd2 = kw(keyword='keyword1',
                       timeStamp=lastMonthTweet + datetime.timedelta(hours=1),
                       lat=34.4, lon=45.5,
                       text='this text contained the hashtag #keyword1',
                       tweetID=346664, userID=4444, source='twitter')
        
        # Insert the current keyword - NOTE HOW THIS IS AFTER THE BASELINE BUILD
        ts2 = timeSeries()
        ts2.importData(testKywd2)
        success = ts2.insertBlankDoc()
        ts2.updateCount()
        
        # Create a keyword object for the current tweet
        testKywd3 = testKywd
        testKywd3.timeStamp = tweetTime
        # Instantiate the baseline object/class
        base = bl.baseline(kywd=testKywd3, cellBuildPeriod=600)
        if base.needUpdate == True:
            if not base.lastBaselined():
                doc = base.buildDoc()
                bl.insertBaselineDoc(dbh, doc)
        
        # Insert the current keyword - NOTE HOW THIS IS AFTER THE BASELINE BUILD
        ts3 = timeSeries()
        ts3.importData(testKywd3)
        success = ts3.insertBlankDoc()
        ts3.updateCount()

        tweetTimeMinus2Days = tweetTime - datetime.timedelta(days=2)
        
        # Create a new keyword object to test the daily slicing
        testKywd5 = kw(keyword='keyword1',
                       timeStamp=tweetTimeMinus2Days,
                       lat=34.4, lon=45.5,
                       text='this text contained the hashtag #keyword1',
                       tweetID=346664, userID=4444, source='twitter')
        
        # Insert the current keyword - NOTE HOW THIS IS AFTER THE BASELINE BUILD
        ts5 = timeSeries()
        ts5.importData(testKywd5)
        success = ts5.insertBlankDoc()
        ts5.updateCount()

        # Process Baseline
        base.processBaseline()
        
        # Get back the 30 day array
        arr = base.test30DayArray
        
        # Calculate what the array length should be
        soFarToday = (thisMinute - today).seconds/60.0
        
        # The start of the array datetime
        lastMonthDay = lastMonthTweet.replace(hour=0, minute=0, second=0, microsecond=0)
        
        # The number of days between today and the start of the array (then in minutes)
        dateDiff = (today - lastMonthDay)
        minsDiff = dateDiff.days*1440 + dateDiff.seconds/60.0 
        total = minsDiff + soFarToday
        
        # Confirm its the right length
        self.assertEqual(total, len(arr))
        
        # Get the minutes for the first 2 keywords (the third shouldn't be there)
        kwd1Min = int((testKywd.timeStamp - lastMonthDay).seconds/60)
        kwd2Min = int((testKywd2.timeStamp - lastMonthDay).seconds/60)
        
        kwd1Test = [arr[kwd1Min-1], arr[kwd1Min], arr[kwd1Min+1]]
        kwd2Test = [arr[kwd2Min-1], arr[kwd2Min], arr[kwd2Min+1]]
        
        for j in arr:
            if arr[j] > 0:
                print j, arr[j]
        
        self.assertEquals(kwd1Test, [0,1,0])
        self.assertEquals(kwd2Test, [0,1,0])
        
        # 30 DAY TIME SLICE CHECK
        arr = base.test30DaySliced
        # weekly 
        testSliced = int(30/7) * 6 * 60
        self.assertEquals(testSliced, len(arr))
        
        arr7Day = base.test7DayArray
        test7DayAll = (thisMinute - today).seconds/60.0 + 1440*7
        self.assertEquals(len(arr7Day), int(test7DayAll))
        
        arr30Hrs = base.test30hrArray
        test30Hours = 30*60
        self.assertEquals(len(arr30Hrs), int(test30Hours))
        
        # Close the connection
        mdb.close(c, dbh)
Beispiel #30
0
def getIndex(type):
    conn = mdb.connectAShare()
    data = mdb.query(conn, f"select * from idx where type = '{type}'")

    mdb.close(conn)
    return data
Beispiel #31
0
def main(): 
    '''
    Script to build tweet objects from the VAST dataset and place them on a Queue and/or JMS
    for testing purposes.
    
    LIKELY SPEED IMPROVEMENTS:
    - BUILDING BLANK ARRAYS IN THE TIME SERIES TAKES A WHILE
    - PUTTING THE KEYWORDS IN A QUEUE, HAVING SET UP THE THREADS TO PROCESS EACH ONE.
    - ANY DUPLICATION CHECKS?
    
    
    
    '''
    db = 'bam'
    host = 'localhost'
    port = 27017
    
    start = datetime.datetime.utcnow()
    tweetProcessTimes = datetime.timedelta(seconds=0)
    
    blUnits     = 'minute'
    blPrecision = 10
    baselineParameters = [blUnits, blPrecision] 
    mgrsPrecision = 2
    
    #dripRate = 1.5
    
    # JMS destination
    #destination = '/topic/test.vasttweets'
    #hostIn      = 'localhost'
    #portIn      = 61613

    # Reset the collections
    c, dbh = mdb.getHandle()
    dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
    dbh = mdb.setupIndexes(dbh)
    
    #jms = jmsCode.jmsHandler(hostIn, portIn, verbose=True)
    # Make the JMS connection via STOMP and the jmsCode class
    #jms.connect()
     
    path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/data/"
    #fName= "MicroblogsSample.csv"
    fName= "MicroblogsOrdered.csv"
    tweetStats = 'tweetStatsFile_50000.csv'
    tptFile = open(path+tweetStats, 'w')
    
    # The script used to generate the baseline
    baselinePath = '/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/src/scripts/'
    baselineScript = 'subprocessBaseline.py'
    scriptFile = os.path.join(baselinePath, baselineScript)

    
    f = retrieveFile(path, fName)
    x = 0
    
    # Start time
    earliestTweet = datetime.datetime(2011, 4, 30, 0, 0)
    earliestTweet = time.mktime(time.struct_time(earliestTweet.timetuple()))
    lastTweetTime = earliestTweet
    print "First Tweet Time: ", lastTweetTime
    
    # This speeds things up from seconds to minutes
    speedUpRate = 1000
    
    # Build a blank timeseries array to save it being built everytime
    blankData = buildBlankData(hours=24)
    
    # Loop the lines build tweet objects
    for line in f.readlines():
        
        #print line
        # Extract content from each line
        line = line.rstrip('\r').rstrip('\n').rstrip('\r')

        if x == 0:
            x+=1
            continue
        
        if x % 100 == 0:
            print "processed: ", x
        
        if x >100000:
            print line
            break
            sys.exit(0)
            
        line = line.split(',')
        
        tweetProcessStart = datetime.datetime.utcnow()
        
        tweetId, dt, latLon, text = line
        
        # Get the geos
        geos = getGeos(tweetId, latLon)
        if not geos:
            print "skipping this record - bad or no geos"
            continue
        
        # Get the datetime group into seconds since UNIX time
        dtg = getTime(tweetId, dt)

        if not dtg:
            print "skipping this record - bad or no time"
            continue
        
        # Get the tweettime into seconds from UNIX
        tweetTime = time.mktime(time.struct_time(dtg.timetuple()))
        #print "The time of this tweet", tweetTime
        
        # Get the tweet time in seconds since the last tweet
        sinceLastTweet = tweetTime - lastTweetTime
        #print "Time since last tweet", sinceLastTweet
        
        #delay = sinceLastTweet / speedUpRate
        #print "Delay: ", delay
                
        # Apply a scaling to it
        #time.sleep(delay)
        
        # Assign this tweet time to be the last tweet time
        lastTweetTime = tweetTime
        
        # Build a tweet object
        twt = vastTweet()
        twt.importData(timeStamp=dtg, lat=geos[0], lon=geos[1], text=text, tweetId=tweetId)
        
        #----------------------------------------------------------------------------------
        # PROCESS INTO KEYWORDS
                
        # Build into keywords - skipping a step for development
        kywd = processTweet(twt, mgrsPrecision)
        
        # Add keywords to the list based on hashtags
        kywd.fromHashTag()
        
        # Add keywords to the list based on name lookup
        kywd.fromLookup()

        if len(kywd.keywords) == 0:
            pass
            #print "No matches: ", twt.text
        
        xx = 0
        #Now loop the resultant keywords
        for kwObj in kywd.keywords:
            
            xx += 1
            
            #print "------------------"
            #print kwObj.keyword
            #print kwObj.text
        
            #-------------------------------------------------------
            # Pass keyword object into a class
            #ts = timeSeries(host='localhost', port=27017, db='bam')
            ts = timeSeries(c=c, dbh=dbh)
            ts.importData(kwObj, blockPrecision=24)
    
            success = ts.insertDoc(blankData=blankData, incrementBy=100)
  
            callBaseliner(scriptFile, host, port, db, kwObj, baselineParameters, mac=1)
  
        # METRICS - currently about 0.05 seconds per tweet
        tweetProcessStop = datetime.datetime.utcnow()
        tweetProcessTimes += (tweetProcessStop - tweetProcessStart)
        processDif = (tweetProcessStop - tweetProcessStart) 
        tptFile.write(str(x)+","+str(xx)+","+str(processDif.seconds + processDif.microseconds/1000000.)+"\n")
        #----------------------------------------------------------------------------------
        # SEND TO JMS WITH THIS CODE

        # Convert it into a JSON object
        #jTwt = twt.vastTweet2Json()
        #print jTwt

        # Push the JSON version of the tweet to the JMS
        #jms.sendData(destination, jTwt, x)

        #----------------------------------------------------------------------------------
        
        x += 1
    
        #time.sleep(dripRate)
        
    # Disconnect from the JMS
    #jms.disConnect()    

    end = datetime.datetime.utcnow()
    dif = end - start
    
    print "Total Tweet Process Time: %s" %tweetProcessTimes.seconds
    print "Average Tweet process time: %s" % (float(tweetProcessTimes.seconds)/float(x))

    print "Tweet Processed: %s" %x
    print "Total Process Time: %s" %(dif)
    
    # Close the mongo connection
    mdb.close(c, dbh)
    f.close()
    tptFile.close()
Beispiel #32
0
def buildSubscription(event):
    ''' Builds a new subscription based on an GET called event'''

    # Placeholder for doing this by users/algorithm?
    user = '******'

    cwd = os.getcwd()
    cfgs = os.path.join(cwd, 'config/crowded.cfg')
    p = getConfigParameters(cfgs)

    #print "Config Filepath in buildSubscription: ", cfgs

    # The mongo bits
    c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword)
    subsCollHandle = dbh[p.subsCollection]
    evCollHandle   = dbh[p.eventsCollection]
                
    # Check whether we definitely need a new subscription or not    
    checked = checkForExistingSubs(p, subsCollHandle, event)
    
    # If the subscription doesn't already exist, 
    if checked['exists'] == False:
        
        # Get the client and secret keys
        api = InstagramAPI(client_id=p.client, client_secret=p.secret)
    
        # If it's a geo-based subscription
        if event['object'] == 'geography':
            res = api.create_subscription(object='geography', lat=event['lat'], lng=event['lon'], radius=event['radius'],
                                    aspect='media', callback_url=p.subBaseUrl)
            print "Geo Subscription setup: %s" %res
        # A tag-based subscription
        elif event['object'] == 'tag':
            res = api.create_subscription(object='tag', object_id=event['tag'], aspect='media', callback_url=p.subBaseUrl)
            print "Tag Subscription setup: %s" %res
        # Just in case
        else:
            print 'Didnt setup a subscription' 
            res = None
    
        # Update the subscription collection 
        if res and res['meta']['code']==200:
    
            data = res['data']
            subType  = data['object'] 
            objectId = data['object_id'] 
            subId    = data['id']
            aspect   = data['aspect']
            success = updateSubs(subsCollHandle, subType, subId, objectId, aspect, event, user)
            
            # Build the response 
            response = {'success'  : True,
                        'objectId' : objectId,
                        'object'   : subType,
                        'url'      : "%s/%s" %(p.baseUrl, success)}
        
            # Insert a blank document to populate
            _id = buildEventPlaceholder(evCollHandle, subType, event, objectId)
            
        # Something failed in the subscription build...?
        else:
            print '='*40
            print 'Failed here. No event placeholder or subscription updated.'
            print res
            print '='*40
            response = {'success'  : False,
                        'objectId' : checked['objectId'],
                        'object'   : checked['object'],
                        'url'      : "%s/%s" %(p.baseUrl, checked['objectId'])}
    
    # A valid subscription already exists 
    elif checked['exists'] == True:
        response = {'success'  : True,
                    'objectId' : checked['objectId'],
                    'object'   : checked['object'],
                    'url'      : "%s/%s" %(p.baseUrl, checked['objectId'])}

    # Close the connection/handle
    mdb.close(c, dbh)

    return response
Beispiel #33
0
    p = getConfigParameters(configFile)

    # Get a db handle
    if p.verbose==True:
        print "---- Geting Mongo Handle."
    c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db)
    
    try:
        auth = dbh.authenticate(p.dbUser, p.dbPassword)
    except Exception, e:
        print "Failed to authenticate with mongo db."
        print e

    # The collections provided and create them and their indexes
    for coll in p.collections:
        collHandle = buildCollection(dbh, p, coll['collection'])
        indexes = buildIndexes(p, coll, collHandle)
    
    mdb.close(c, dbh)
    
if __name__ == "__main__":

    # Command Line arguments
    configFile = sys.argv[1]
    
    # first argument is the config file path
    if not configFile:
        print 'no Config file provided. Exiting.'
        sys.exit()
    
    main(configFile)