Esempio n. 1
0
class LocalTouristClassifier():
    def __init__(self):
        self.tweets = Connection().tweetsDB.tweetsCollection
        self.tweetUsers = Connection().tweetsDB.tweetUsersCollection
        self.tweetUsers.ensure_index( [("loc", GEO2D )] )
        
        self.photos = Connection().flickrDB.flickrCollection
        self.linked = Connection().linkedDB.linkedCollection
        
        API_KEY = 'dj0yJmk9UUY5TWxNMXBRb0M3JmQ9WVdrOVV6RlVOWFEzTjJzbWNHbzlNVGMzTVRBNE5EazJNZy0tJnM9Y29uc3VtZXJzZWNyZXQmeD0zYQ--'
        SHARED_SECRET = '92a96753c369996f18b6a2ef4a6b1b9c85de04f5'
        self.y = yql.TwoLegged(API_KEY, SHARED_SECRET)
        self.yqlCache = {}
        
    def unescape_html_chars(self, item):
        return item.replace("&amp;", "&").replace("&gt;", ">").replace("&lt;", "<").replace("&quot;", "\"")
        

        
    def classifyTwitter(self):
        for tweet in self.tweets.find({"place":self.place}):
            if tweet['fromUserID'] is not None:
                tweetUser = self.tweetUsers.find_one({'id':tweet['fromUserID']})
                if tweetUser is not None:
                    tweetUserLocation = tweetUser['location']
                    if tweetUserLocation is not None and tweetUser['loc'] is None:
                        tweetUserLocation = tweetUserLocation.encode('utf-8')
                        #print "%s || %s" % (tweetUserLocation, self.place)
                        # we use the yqlCache local dictionary to use as few calls as possible
                        if self.yqlCache.get(tweetUserLocation) is not None and self.yqlCache[tweetUserLocation] != 0:
                            tweetUser['loc'] = self.yqlCache[tweetUserLocation]
                            print 'cacheSuccess: %20s %15s %s' % (tweetUserLocation, tweetUser['id'], tweetUser['loc'])
                        else:
                            # send request out to YQL
                            yqlQuery = 'select * from geo.placefinder where text="%s";' % tweetUserLocation
                            try:
                                yqlResult = self.y.execute(yqlQuery)
                                if yqlResult.rows == []:
                                    # yql couldn't figure out where this is, so don't save a loc
                                    self.yqlCache[tweetUserLocation] = 0
                                    print 'fail: %20s %s' % (tweetUserLocation, tweetUser['id'])
                                else:
                                    # yql found a lat and lon, so let's tag it
                                    loc = [float(yqlResult.rows[0].get('latitude')), float(yqlResult.rows[0].get('longitude'))]
                                    tweetUser['loc'] = loc
                                    self.yqlCache[tweetUserLocation] = loc
                                    print 'success: %20s %15s %s' % (tweetUserLocation, tweetUser['id'], loc)
                            except:
                                print "Exception Detected:", sys.exc_info()[0]
                                
                        # ready to save user
                        self.tweetUsers.save(tweetUser)
Esempio n. 2
0
from pymongo import Connection
import cyclone.web, cyclone.websocket
from twisted.internet import reactor
from describelocation import describeLocationFull, metersFromHome
logging.basicConfig(level=logging.INFO)
log = logging.getLogger()

MAP = Namespace("http://bigasterisk.com/map#")
XSD = Namespace("http://www.w3.org/2001/XMLSchema#")
sys.path.append("/my/site/magma")
from stategraph import StateGraph

config = json.loads(open("priv.json").read())
m = config['mongo']
mongo = Connection(m['host'], m['port'])[m['db']][m['collection']]
mongo.ensure_index([('recv_time', 1)])

TIME_SORT = ('timestamp', -1)
def pt_sec(pt): return pt['timestamp']

if 0:
    # owntracks is stalling on the 'tst' time value, but sending mostly ok data
    TIME_SORT = ('recv_time', -1)
    def pt_sec(pt): return pt['recv_time']
    

class Trails(cyclone.web.RequestHandler):
    def get(self):
        q = json.loads(self.get_argument('q'))
        self.write(getUpdateMsg(query=q))
Esempio n. 3
0
    proc_col.ensure_index('most_recent_event_timestamp')

    # For time range searches!  This multi-key index ensures fast
    # searches for creation_time alone too!
    proc_col.ensure_index([('creation_time', ASCENDING),
                           ('exit_time', ASCENDING)])

    proc_col.ensure_index('phases.name')
    proc_col.ensure_index('phases.start_time')
    proc_col.ensure_index('phases.files_read.timestamp')
    proc_col.ensure_index('phases.files_written.timestamp')
    proc_col.ensure_index('phases.files_renamed.timestamp')

    # index all collections by session_tag:
    for c in all_cols:
        c.ensure_index('session_tag')

    # one-shot mode is useful for debugging or running on archival logs
    if options.one_shot:
        do_incremental_index()
        sys.exit(0)

    atexit.register(exit_handler)
    signal(SIGTERM,
           lambda signum, frame: exit(1))  # trigger the atexit function to run

    # this loop can only be interrupted by exit_handler()
    while True:
        # sleep first so that we can give the logs some time to build up at
        # the beginning of a login session ...
        time.sleep(INDEXING_PERIOD_SEC)
Esempio n. 4
0
  proc_col.ensure_index('exited')
  proc_col.ensure_index('most_recent_event_timestamp')

  # For time range searches!  This multi-key index ensures fast
  # searches for creation_time alone too!
  proc_col.ensure_index([('creation_time', ASCENDING), ('exit_time', ASCENDING)])

  proc_col.ensure_index('phases.name')
  proc_col.ensure_index('phases.start_time')
  proc_col.ensure_index('phases.files_read.timestamp')
  proc_col.ensure_index('phases.files_written.timestamp')
  proc_col.ensure_index('phases.files_renamed.timestamp')

  # index all collections by session_tag:
  for c in all_cols:
    c.ensure_index('session_tag')


  # one-shot mode is useful for debugging or running on archival logs
  if options.one_shot:
    do_incremental_index()
    sys.exit(0)


  atexit.register(exit_handler)
  signal(SIGTERM, lambda signum,frame: exit(1)) # trigger the atexit function to run

  # this loop can only be interrupted by exit_handler()
  while True:
    # sleep first so that we can give the logs some time to build up at
    # the beginning of a login session ...
Esempio n. 5
0
                '-o', psFile.name])
    svgFile = NamedTemporaryFile(suffix='.svg')
    check_call(['pstoedit',
                '-f', 'plot-svg',
                '-yshift', '580',
                '-xshift', '20',
                psFile.name, svgFile.name])
    lines = open(svgFile.name).readlines()
    return ''.join(lines[2:])

def codeElem(s):
    return '<div class="bc">%s</div>' % makeCode(s)

mpdPaths = Connection("bang", 27017)['barcodePlayer']['mpdPaths']
# {mpdPath:"music/path/to/album/or/song", "_id":12}
mpdPaths.ensure_index([('mpdPath', 1)])
def idForMpdPath(p):
    match = mpdPaths.find_one({"mpdPath" : p})
    if match:
        return match['_id']

    top = list(mpdPaths.find().sort([('_id', -1)]).limit(1))
    newId = top[0]['_id'] + 1 if top else 0
    mpdPaths.insert({"mpdPath" : p, "_id" : newId})
    return newId
            

out = open("out.xhtml", "w")
out.write("""<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">