Exemple #1
0
  def post(self):
    user = self.get_user()
    if not user:
      return

    user_email = self.request.get('user_email')
    query_id = self.request.get('query_id')
    csv_data = self.request.get('csv_data')

    user = User.get_by_email(user_email)
    query = Query.get_by_id(query_id)

    for duple in self.parse_csv_data(csv_data):
      timestamp = duple[0]
      text = duple[1]

      # for testing
      self.response.out.write("<p>%s: %s\n</p>" % (timestamp, text))

      dp = DataPoint(
        timestamp = duple[0],
        query = query,
        text = duple[1])
    
      dp.lt_put()

    self.redirect('/data')
Exemple #2
0
  def post(self):
    user = self.get_user()
    if not user:
      return

    query_id = self.request.get('query_id')
    #user_email = self.request.get('user')
    data = self.request.get('data')
    time = self.request.get('time') #self.request.get('time', '')

    # get the question from the DB
    # get the user from the DB
    # add a Response object to the DB
    
    #user = User.get_by_email(user_email)
    query = db.get(query_id) #hmmm

    dp = DataPoint(
      text = data,
      query = query,
      timestamp = datetime.now()
    )

    dp.lt_put()
    ActionLog.log('NewDatapoint', query.user, query.name)

    self.response.out.write('Got it!')
Exemple #3
0
  def post(self):
    ActionLog.log('ReceivedSMS')

    sender_phone = self.request.get('From')
    body = self.request.get('Body')

    # normalize the phone number to 10 digits
    # '+12268681112' => '2268681112'
    sender_phone = sender_phone[len(sender_phone) - 10:]
    logging.info('Got text from ' + sender_phone)

    # get the user
    user = User.get_by_phone(sender_phone)
    if not user:
      logging.error("Couldn't get the user for phone: " + sender_phone)
      return

    # parse the response
    query_value_pairs = self.parse_body(body)

    for query_name in query_value_pairs:
      value = query_value_pairs[query_name]

      if query_name == '' or value == '':
        logging.error('Got a bad response');
        return

      query = Query.get_by_user_and_name(user, query_name)

      if not query:
        logging.error("Couldn't get query for user " + user.email + ' and query ' + query.name)
        continue

      timestamp = datetime.now()
      
      dp = DataPoint(
        user = user,
        query = query,
        text = value,
        timestamp = timestamp,
      )

      dp.lt_put()

      ActionLog.log('NewDatapoint', user, query.name)

      query.refresh()

      logging.info('Received datapoint ' + query_name + ': ' + value + '\n')

    self.response.out.write('<Response><Sms>Got it!</Sms></Response>')
  def receive(self, mail_message):
    # we need to make a new data point

    # get the user from the sender field
    user_email = mail_message.sender[ mail_message.sender.find('<') + 1 : mail_message.sender.rfind('>') ]
    user = User.get_by_email(user_email)

    if is_admin(user) and mail_message.to.find('users@') != -1:
      forward_to_users(mail_message)
      return

    if mail_message.to.find('feedback@') != -1:
      forward_to_admins(mail_message)
      return

    # get the datapoint from the body
    data = ''
    query_name = mail_message.subject[ mail_message.subject.rfind("Re:") + 4: ]
  
    query = Query.get_by_user_and_name(user, query_name)

    for content_type, body in mail_message.bodies('text/html'):
      # basically deprecated, since we're having users submit forms
      # straight from the emails now.
      logging.info('Message Body: ' + body.decode())
      data = body.decode()[ : body.decode().find('<')]

    if data == None:
      data = ''

    # get the time from now()
    timestamp = datetime.now()

    log_str = "Want to create a new datapoint for user %s and with value %s and query name %s and query %s and timestamp %s" % (user_email, data, query_name, query.key(), timestamp)

    logging.info("Received a message from " + mail_message.sender)
    logging.info(log_str)

    # create and put the datapoint
    # dp = DataPoint(...)
    dp = DataPoint( 
      text = data,
      query = query,
      timestamp = timestamp)
      
    dp.lt_put()
    ActionLog.log('ReceivedEmail')
    ActionLog.log('NewDatapoint', data=query.name, user=user)

    query.refresh()
def correlation_coefficient(aquery, bquery):
  cov = covariance(aquery, bquery)
  
  adatapoints = DataPoint.get_by_query(aquery)
  bdatapoints = DataPoint.get_by_query(bquery)

  sdA = standard_deviation(adatapoints)
  sdB = standard_deviation(bdatapoints)

  if cov is None or sdA is None or sdB is None:
    return None
  
  cc = cov / (sdA * sdB)

  return cc
  def refresh_datapoints(self, queries):
    # weird thing here.
    # if DataPoint.get_by_query cache-misses, we are actually refreshing
    # this twice in a row. If it cache-hits, we only refresh it once (which
    # is what we want)

    # maybe this should be inside model.py, but I don't think an extra
    # few memcache puts every day are going to kill us right now.

    # for each metric
    #   get all datapoints in json
    #   put into memcache
    for query in queries:
      datapoints = DataPoint.get_by_query(query)
      json_dps =  DataPoint.JsonFromArray(datapoints)

      mck_metric_datapoints = str(query.key()) + '.datapoints'
      mck_metric_datapoints_last_update = str(query.key()) + '.datapoints-last-update'

      memcache.set(
        key=mck_metric_datapoints,
        value=json_dps,
      )

      memcache.set(
        key=mck_metric_datapoints_last_update,
        value=datetime.now().strftime('%s'),
      )
    
      logging.info("Refreshed datapoints for metric: " + str(query.key()))
def crosssection_suite(query):
  crosssection_list = []
  user = query.user

  for q in Query.get_by_user(user):
    if q.format == 'number' and q.name != query.name:
      for x in range(query_range(q)[0], query_range(q)[1]):
        avg_name = 'Average when ' + q.name + ' = ' + str(x)
        avg_value = float_str_format(avg_int_on_sliced_int(query, q, x))
  
        crosssection_list.append((avg_name, avg_value))

        percent_name = 'Change from average when ' + q.name + ' = ' + str(x)
        percent_value =float_str_format(percent_from_avg_int_on_sliced_int(query, q, x))

        crosssection_list.append((percent_name, percent_value))

    elif q.format == 'text':
      for word in common_words(DataPoint.get_by_query(q)).split(', '):
        avg_name = 'Average when "' + word + '" is in ' + q.name
        avg_value = float_str_format(avg_int_on_sliced_text(query, q, word))

        crosssection_list.append((avg_name, avg_value))

        #percent_name = 'Change from average when ' + word + ' in ' +q.name
        #percent_value = float_str_format(percent_from_avg_int_on_sliced_int(query, q, x))

        #crosssection_list[percent_name] = percent_value
        
  return crosssection_list
def analyze_text_query_data(query):
  datapoints = DataPoint.get_by_query(query)
  analytic_list = []

  analytic_list.extend(basic_suite(datapoints))

  return analytic_list
Exemple #9
0
  def query_to_table_row(self, query):
    metric_html = open('ui/html/metric.html').read()

    current_value = ''
    try:
      current_value = DataPoint.get_by_query_most_recent(query)[0].text
    except IndexError:
      current_value = 'None'

    metric_overview = self.get_overview(query)

    metric_data = {
      'query_id': query.key(), 
      'name': query.name, 
      'text': query.text, 
      'format': query.format, 
      'frequency': self.frequency_minutes_to_text(query.frequency),
      'lastsentat': query.lastSentAt,
      'freq_minutes': query.frequency,
      'current_value': current_value,
      'overview': metric_overview,
      'ask_when': json.dumps(query.ask_when),
    }

    return metric_html % metric_data
def avg_int_on_sliced_text(aquery, bquery, value):
  adatapoints = DataPoint.get_by_query(aquery)
  bdatapoints = DataPoint.get_by_query(bquery)
  adata = mapize_int_data(adatapoints)
  bdata = mapize_data(bdatapoints) # not int data!

  # bucket to days
  adata = bucket_to_days(adata)
  bdata = bucket_to_days(bdata)

  # throw out all the datapoints that aren't 'value'
  bdata = text_cross_section(bdata, value)

  adata, bdata = symmettrysize(adata, bdata)   

  avg = map_data_average(adata)

  return avg
def covariance(int_query_a, int_query_b):
  # cov = sum for all i (x - xnaught)(y-ynaught) all over N-1
  # we need to match up points between the two datasets

  adatapoints = DataPoint.get_by_query(int_query_a)
  bdatapoints = DataPoint.get_by_query(int_query_b)

  adata = mapize_int_data(adatapoints)
  bdata = mapize_int_data(bdatapoints)

  #if adata is None:
  #  print int_query_a.name + " is none!"

  # tweak the data so we only have a single point for each day 
  # this can return just index: data, since we don't care about the actual
  # times
  adata = bucket_to_days(adata)
  bdata = bucket_to_days(bdata)

  # tweak the data such that there is a 1:1 mapping between the sets
  adata, bdata = symmettrysize(adata, bdata)


  # key it from 0...

  # do the actual covariance
  N = len(adata)

  if N <= 1: # we divide by N-1 just below
    return None

  aAvg = map_data_average(adata)
  bAvg = map_data_average(bdata)

  sum = 0.0
  for i in adata.keys():
    sum += (adata[i] - aAvg)*(bdata[i] - bAvg)

  if N-1 <= 0:
    cov = 0
  else:
    cov = sum/(N-1)
 
  return cov
Exemple #12
0
  def get(self):
    user = self.get_user()
    if not user:
      return
      
    query_id = self.request.get('query_id')
    query = Query.get_by_id(query_id)
   
    datapoints = DataPoint.get_by_query(query)
    
    frequencies = common_word_frequencies(datapoints)

    self.response.out.write('[' + json.dumps(frequencies) + ']')
def avg_int_on_sliced_int(aquery, bquery, value):
  adatapoints = DataPoint.get_by_query(aquery)
  bdatapoints = DataPoint.get_by_query(bquery)
  adata = mapize_int_data(adatapoints)
  bdata = mapize_int_data(bdatapoints)

  # bucket sleep by days
  bdata = bucket_to_days(bdata)
  adata = bucket_to_days(adata)
 
  # throwout all the sleep dps that aren't 8 

  bdata = integer_cross_section(bdata, value)
     
  symmettrysize(adata, bdata)

  if len(adata) == 0:
    return 0

  # average the bdata values 
  avg = map_data_average(adata)

  # return it
  return avg
def analyze_integer_query_data(query):
  datapoints = DataPoint.get_by_query(query)

  analytic_list = []
  # basics
  analytic_list.extend(basic_suite(datapoints))
  # daily basics
  analytic_list.extend(daily_suite(datapoints))

  analytic_list.extend(covariance_suite(query))

  analytic_list.extend(correlation_suite(query))

  analytic_list.extend(crosssection_suite(query ))

  return analytic_list
Exemple #15
0
  def get(self):
    user = self.get_user()
    if not user:
      return

    user_email = self.request.get('user_email')
    query_id = self.request.get('query_id')

    user = User.get_by_email(user_email)
    query = db.get(query_id) #hmmm

    datapoints = []

    for datapoint in DataPoint.get_by_query(query):
      datapoints.append(datapoint.to_dict())

    self.response.out.write(json.dumps(datapoints))
Exemple #16
0
  def get(self):
    user = self.get_user()
    if not user:
      return

    user_email = self.request.get('user_email')
    
    user = User.get_by_email(user_email)
    queries = Query.get_by_user(user)

    datapoints = []

    for query in queries:
      for datapoint in DataPoint.get_by_query(query):
        datapoints.append(datapoint.to_dict())

    self.response.out.write(json.dumps(datapoints))
Exemple #17
0
  def get(self):
    user = self.get_user()
    if not user:
      return

    user_email = self.request.get('user_email')
    query_id = self.request.get('query_id')

    query = Query.get_by_id(query_id)

    datapoints = DataPoint.get_by_query(query)
    
    csv_data = ''
    
    for dp in datapoints:
      csv_data += self.dp_to_csv(dp)

    self.response.out.write(csv_data)
Exemple #18
0
  def query_data_from_db(self, query):
    query_template = open('ui/html/metric_data.html').read()

    rows =  ''
    # get all datapoints associated with the query
    datapoints = DataPoint.get_by_query(query)
    # for each datapoint from the query
    #   append data_point_to_row(dp)
    for dp in datapoints:
      rows += self.data_point_to_row(dp)
  
    params =  {
      'rows': rows, 
      'name': query.name, 
      'query_id': query.key()
    }

    return query_template % params
  def refresh_most_recent_dp(self, queries):
    for query in queries:
      most_recent_dp = DataPoint.get_by_query_most_recent(query)
      
      mck_most_recent_dp = str(query.key()) + '.most-recent-dp'
      mck_most_recent_dp_update = str(query.key()) + 'most-recent-dp-update'

      memcache.set(
        key=mck_most_recent_dp,
        value=most_recent_dp,
      )

      memcache.set(
        key=mck_most_recent_dp_update,  
        value=datetime.now().strftime('%s'),
      )

      logging.info('Updated Most Recent Datapoint for metric: ' + str(query.key()))
Exemple #20
0
def format_twitter_status(status):
	"""Formats tweets into the correct format for delivery to the client
	"""
	# if status['text'].startswith('RT'):
	# 		return None
	
	if not status['geo']:
		return None
	
	print "this works"
	tweet = DataPoint()
	tweet.content = status['text']
	tweet.latitude = status['geo']['coordinates'][0]
	tweet.longitude = status['geo']['coordinates'][1]
	tweet.time = datetime.strptime(status['created_at'], '%a %b %d %H:%M:%S +0000 %Y')
	tweet.tags = map(lambda x: x['text'], status['entities']['hashtags'])
	print "more stuff"
	return tweet.json()
Exemple #21
0
  def post(self):
    user = self.get_user()
    if not user:
      return

    query_id = self.request.get('query_id')

    query = db.get(query_id)
    if not query:
      self.response.out.write('failure!')
      return
   
    # delete all the datapoints associated with the query as well.
    datapoints = DataPoint.get_by_query(query)

    for dp in datapoints:
      dp.delete()

    # finally, delete the query 
    query.delete()
def integer_overview(query):
  datapoints = DataPoint.get_by_query(query)
  return 'Average: ' + float_str_format(average(datapoints))
def query_range(query):
  datapoints = DataPoint.get_by_query(query)
  return data_range(datapoints)
Exemple #24
0
def query_average(query):
  datapoints = DataPoint.get_by_query(query)
  return average(datapoints)
def time_overview(query):
  datapoints = DataPoint.get_by_query(query)
  return 'Average Time: ' + str(average_time(datapoints))
from datetime import datetime
from model import DataPoint

def format_twitter_status(status):
<<<<<<< HEAD
	"""Formats tweets into the correct format for delivery to the client
	"""
	
	if not status['geo']:
		return None
	
	tweet = DataPoint()
	tweet.content = status['text']
	tweet.latitude = status['geo']['coordinates'][0]
	tweet.longitude = status['geo']['coordinates'][1]
	tweet.time = datetime.strptime(status['created_at'], '%a %b %d %H:%M:%S +0000 %Y')
	tweet.tags = map(lambda x: x['text'], status['entities']['hashtags'])
	
	return tweet.json()
=======
	   """Formats tweets into the correct format for delivery to the client
	   """
	   if status['text'].startswith('RT'):
			   return None

	   if not status['geo']:
			   return None

	   tweet = DataPoint()
	   tweet.content = status['text']
	   tweet.latitude = status['geo']['coordinates'](0)
Exemple #27
0
  def get(self):
    user = self.get_user()
    if not user:
      return

    logout_url = users.create_logout_url(self.request.uri)
    
    yesterday = datetime.now() - timedelta(hours=24)

    new_datapoints = ActionLog.get(
      action = 'NewDatapoint',
      timewindow = yesterday,
    ).count()

    new_metrics = ActionLog.get(
      action = 'NewMetric', 
      timewindow = yesterday,
    ).count()

    queries_sent = ActionLog.get(
      action = 'SentQuery',
      timewindow = yesterday,
    ).count()

    sms_sent = ActionLog.get(
      action = 'SentSMS',
      timewindow = yesterday,
    ).count()

    emails_sent = ActionLog.get(
      action = 'SentEmail',
      timewindow = yesterday,
    ).count()

    emails_received = ActionLog.get(
      action = 'ReceivedEmail',
      timewindow = yesterday,
    ).count()

    sms_received = ActionLog.get(
      action = 'ReceivedSMS',
      timewindow = yesterday,
    ).count()

    new_logins = ActionLog.get(
      action = 'FirstTimeLogin',
      timewindow = yesterday,
    ).count()

    dashboard_avg_walltime = average_walltime('/dashboard')
    data_avg_walltime = average_walltime('/data')
    home_avg_walltime = average_walltime('/')
    analyze_avg_walltime = average_walltime('/analyze')
    analyze_json_avg_walltime = average_walltime('/analyzeJSON')

    dashboard_worst_walltime = worst_walltime('/dashboard')
    data_worst_walltime = worst_walltime('/data')
    home_worst_walltime = worst_walltime('/')
    analyze_worst_walltime = worst_walltime('/analyze')
    analyze_json_worst_walltime = worst_walltime('/analyzeJSON')

    # hackey high-number for now.
    total_metrics = Query.all().count(100000)
    total_datapoints = DataPoint.all().count(100000)

    f = open('intern/html/engagement.html')
    html = f.read()

    params = {
      'new_datapoints': new_datapoints,
      'new_metrics': new_metrics,
      'total_metrics': total_metrics,
      'total_datapoints': total_datapoints,
      'queries_sent': queries_sent,
      'sms_sent': sms_sent,
      'emails_sent': emails_sent,
      'sms_received': sms_received,
      'emails_received': emails_received,
      'new_logins': new_logins,
      'dashboard_walltime': dashboard_avg_walltime,
      'data_walltime': data_avg_walltime,
      'home_walltime': home_avg_walltime,
      'analyze_json_walltime': analyze_json_avg_walltime,
      'analyze_walltime': analyze_avg_walltime,
      'dashboard_worst_walltime': dashboard_worst_walltime,
      'data_worst_walltime': data_worst_walltime,
      'home_worst_walltime': home_worst_walltime,
      'analyze_json_worst_walltime': analyze_json_worst_walltime,
      'analyze_worst_walltime': analyze_worst_walltime,
    }

    self.response.out.write(html % params)
def text_overview(query):
  datapoints = DataPoint.get_by_query(query)
  return 'Common Words: ' + common_words(datapoints)