Beispiel #1
0
    def setUp(self):
        ''' instantiate objects'''

        self.hostIn  = 'localhost'
        self.portIn       = 61613

        self.jms = jmsCode.jmsHandler(self.hostIn, self.portIn, verbose=True)
def main(p, response):
    '''Handles the subscription updates, including making the call to the endpoint and dumping to jms/text.'''

    # Make the JMS connection via STOMP and the jmsCode class    
    if p.jmsBase == True:
        import jmsCode
        jms = jmsCode.jmsHandler(p.jmsHost, p.jmsPort, verbose=True)
        jms.connect()
    
    # If the config says save it out to file, do so
    if p.fileBase == True:
        outDir = os.path.dirname(p.configFile)

    # Accepts a list of dictionaries - the update message
    updates = json.loads(response)

    # Format the url and get the media metadata
    for upd in updates:
                
        # Does the next URL already exist for this object?
        url = getNextUrl(p, upd['object_id'])
        
        # If the next (ie this) url hasn't been written to a file, build it from the config file 
        if url == None:
            url = buildUrl(p, upd['object'], upd['object_id'])
        
        # Get the media that has changed since the last time
        mediaMeta = getMediaUpdates(url)    
        
        # Find the pagination info and save out info that concerning next url for this subscription
        handleMediaPagination(p, url, upd['object_id'], mediaMeta)
                
        # Format the content - make it like a tweet
        data = formatMetadata(mediaMeta)
        
        # Loop each photo that is referenced by this update and either save it out or message it
        for photo in data:
            
            # Dump the media metadata out to a string
            jPhoto = json.dumps(photo, ensure_ascii=True)

            # Write the json for this photo out to file            
            if p.fileBase == True:
                f = open(os.path.join(outDir, str(photo['id'])+'.json'), 'w')
                f.write(jPhoto)
                f.close()
            
            # Put the metadata onto the JMS
            if p.jmsBase == True:
                jms.sendData(p.jmsDest, jPhoto, photo['id'])
    
    # Close the jms connection
    if p.jmsBase == True:
        jms.disConnect()
        
Beispiel #3
0
def main(timeStamp=None):
    
    '''

    '''
    print 'in main'
    
    # Get the config params into a object
    path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config"
    file = "periodicGeoAlert.cfg"
    params = getConfig(path,file)

    # Make the JMS connection via STOMP and the jmsCode class
    if params.publishJms:
        jms = jmsCode.jmsHandler(params.jHost, params.jPort, verbose=params.verbose)
        jms.connect()
        
    # Instantiate the mgrs lib
    m = mgrsLib.MGRS()

    # Time Variables
    if not timeStamp:
        now = datetime.datetime.utcnow()
    else:
        now = timeStamp
    
    nowMinute = getThisMinute(now)
    
    # Connect and get handle
    c, dbh = mdb.getHandle(params.mHost, params.mPort, params.mDb)
    
    # Assign collection handles to variables for easier passing
    baseCollHandle  = dbh[params.baseColl]
    tsCollHandle    = dbh[params.tsColl]
    mapCollHandle   = dbh[params.cellColl]
    
    # Retrieve the active cells
    activeCells = getActiveCells(baseCollHandle, timeStamp=now, lookback=params.lookback, mgrsPrecision=params.mgrsPrecision)
    
    fxx = open(path+'outGeoJson.gjsn', 'w')
    
    # Loop those active cells
    for activeCell in activeCells:
    
        kywd = activeCell['keyword']
        mgrs = activeCell['mgrs']
        print mgrs
        # The period for this count value
        duration = datetime.timedelta(seconds=params.lookback)
        print 'duration', duration
        # The coordinates of the polygon to be mapped from MGRS
        coords = buildPolygon(m, mgrs, params.mgrsPrecision)
        print 'coords: ', coords
        # The total count value for this mgrs/keyword/mgrsPrecision
        count = getCountsForActiveCells(tsCollHandle, nowMinute, params.lookback, mgrs, params.mgrsPrecision, kywd)
        print 'count: %s' %count
        # ANOMALY: Get a list of metrics that indicated it was anomalous
        #anomalies = checkForAnomalies(activeCell, count)
        anomalies = None
        
        # A geoJson object representing all of this information
        geoJson = buildGeoJson(kywd, coords, mgrs, params.mgrsPrecision, now, duration, count, anomalies)
        
        # ANOMALY: If it was anomalous, push the geoJson to JMS
        if params.publishJms == True:
            jms.sendData(params.jDestination, geoJson)
            fxx.write(geoJson+'\n')
            
        # Insert the geoJson into the mapping collection
        if params.storeCell == True:
            success = insertGeoJson(mapCollHandle, reformatGeoJsonTime(geoJson))
            print 'success: %s' %success
            
    #jms.disConnect()
    mdb.close(c, dbh)
    fxx.close()
Beispiel #4
0
def main(): 
    ''' Puts json representations of tweets onto a JMS for later processing into keywords, etc.
        Currently configured to work with the VAST dataset rather than a live tweet stream. '''

    startTimer = datetime.datetime.utcnow()
    lastTimer = datetime.datetime.utcnow()
    
    # JMS PARAMETERS
    destination = '/queue/tweets'
    hostIn      = 'localhost'
    portIn      = 61613
    # File Path Info
    path       = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/data/"
    configPath = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config/"
    fName= "MicroblogsOrdered.csv"
    # Read in a twitter template
    templateName = 'recordTemplate.json'
    templateFile = open(os.path.join(configPath, templateName))
    template = json.loads(templateFile.read())
    
    # Make the JMS connection via STOMP and the jmsCode class    
    jms = jmsCode.jmsHandler(hostIn, portIn, verbose=True)
    jms.connect()

    f = retrieveFile(path, fName)
    x = 0
    
    # Loop the lines build tweet objects
    for line in f.readlines():
        
        # Extract content from each line
        line = line.rstrip('\r').rstrip('\n').rstrip('\r')

        if x == 0:
            x+=1
            continue
        
        if x % 2000 == 0:
            print "Processed: ", x
            counterTimer = datetime.datetime.utcnow()
            intermediateTimer = counterTimer - lastTimer
            lastTimer = datetime.datetime.utcnow()
            print "2,000 taken: %s" %intermediateTimer
        
        if x % 2000 == 0:
            jms.disConnect()
            f.close()
            sys.exit()
            
        line = line.split(',')
        tweetId, dt, latLon, text = line
        
        # Get the geos
        geos = getGeos(tweetId, latLon)
        if not geos[0]: continue
        
        # Get the datetime group into seconds since UNIX time
        dtg = getTime(tweetId, dt)
        if not dtg: continue
        
        tweetJson = copy.copy(template)
        tweetJson['app'] = 'twitter'
        tweetJson['user']['id'] = tweetId
        tweetJson['coordinates']['coordinates'] = [geos[1], geos[0]]
        tweetJson['created_at'] = dtg.strftime('%a %b %d %H:%M:%S +0000 %Y')
        tweetJson['text'] = text
        
        #==============================================================================
        # Could put in here something to add in a platform (mobile vs static, etc)
        #==============================================================================
        
        try:
            tweetJson = json.dumps(tweetJson)
        except:
            tweetJson['text'] = text.decode('latin-1')
            tweetJson = json.dumps(tweetJson)
        
        jms.sendData(destination, tweetJson, x)
    
        #time.sleep(10)
        x+=1
        
    # Disconnect from the topic    
    jms.disConnect()
    f.close()

    stopTimer = datetime.datetime.utcnow()
    takenTimer = stopTimer - startTimer
    print "TimeTaken: %s" %takenTimer