def correlation_coefficient(aquery, bquery):
  cov = covariance(aquery, bquery)
  
  adatapoints = DataPoint.get_by_query(aquery)
  bdatapoints = DataPoint.get_by_query(bquery)

  sdA = standard_deviation(adatapoints)
  sdB = standard_deviation(bdatapoints)

  if cov is None or sdA is None or sdB is None:
    return None
  
  cc = cov / (sdA * sdB)

  return cc
def crosssection_suite(query):
  crosssection_list = []
  user = query.user

  for q in Query.get_by_user(user):
    if q.format == 'number' and q.name != query.name:
      for x in range(query_range(q)[0], query_range(q)[1]):
        avg_name = 'Average when ' + q.name + ' = ' + str(x)
        avg_value = float_str_format(avg_int_on_sliced_int(query, q, x))
  
        crosssection_list.append((avg_name, avg_value))

        percent_name = 'Change from average when ' + q.name + ' = ' + str(x)
        percent_value =float_str_format(percent_from_avg_int_on_sliced_int(query, q, x))

        crosssection_list.append((percent_name, percent_value))

    elif q.format == 'text':
      for word in common_words(DataPoint.get_by_query(q)).split(', '):
        avg_name = 'Average when "' + word + '" is in ' + q.name
        avg_value = float_str_format(avg_int_on_sliced_text(query, q, word))

        crosssection_list.append((avg_name, avg_value))

        #percent_name = 'Change from average when ' + word + ' in ' +q.name
        #percent_value = float_str_format(percent_from_avg_int_on_sliced_int(query, q, x))

        #crosssection_list[percent_name] = percent_value
        
  return crosssection_list
def analyze_text_query_data(query):
  datapoints = DataPoint.get_by_query(query)
  analytic_list = []

  analytic_list.extend(basic_suite(datapoints))

  return analytic_list
  def refresh_datapoints(self, queries):
    # weird thing here.
    # if DataPoint.get_by_query cache-misses, we are actually refreshing
    # this twice in a row. If it cache-hits, we only refresh it once (which
    # is what we want)

    # maybe this should be inside model.py, but I don't think an extra
    # few memcache puts every day are going to kill us right now.

    # for each metric
    #   get all datapoints in json
    #   put into memcache
    for query in queries:
      datapoints = DataPoint.get_by_query(query)
      json_dps =  DataPoint.JsonFromArray(datapoints)

      mck_metric_datapoints = str(query.key()) + '.datapoints'
      mck_metric_datapoints_last_update = str(query.key()) + '.datapoints-last-update'

      memcache.set(
        key=mck_metric_datapoints,
        value=json_dps,
      )

      memcache.set(
        key=mck_metric_datapoints_last_update,
        value=datetime.now().strftime('%s'),
      )
    
      logging.info("Refreshed datapoints for metric: " + str(query.key()))
def avg_int_on_sliced_text(aquery, bquery, value):
  adatapoints = DataPoint.get_by_query(aquery)
  bdatapoints = DataPoint.get_by_query(bquery)
  adata = mapize_int_data(adatapoints)
  bdata = mapize_data(bdatapoints) # not int data!

  # bucket to days
  adata = bucket_to_days(adata)
  bdata = bucket_to_days(bdata)

  # throw out all the datapoints that aren't 'value'
  bdata = text_cross_section(bdata, value)

  adata, bdata = symmettrysize(adata, bdata)   

  avg = map_data_average(adata)

  return avg
def covariance(int_query_a, int_query_b):
  # cov = sum for all i (x - xnaught)(y-ynaught) all over N-1
  # we need to match up points between the two datasets

  adatapoints = DataPoint.get_by_query(int_query_a)
  bdatapoints = DataPoint.get_by_query(int_query_b)

  adata = mapize_int_data(adatapoints)
  bdata = mapize_int_data(bdatapoints)

  #if adata is None:
  #  print int_query_a.name + " is none!"

  # tweak the data so we only have a single point for each day 
  # this can return just index: data, since we don't care about the actual
  # times
  adata = bucket_to_days(adata)
  bdata = bucket_to_days(bdata)

  # tweak the data such that there is a 1:1 mapping between the sets
  adata, bdata = symmettrysize(adata, bdata)


  # key it from 0...

  # do the actual covariance
  N = len(adata)

  if N <= 1: # we divide by N-1 just below
    return None

  aAvg = map_data_average(adata)
  bAvg = map_data_average(bdata)

  sum = 0.0
  for i in adata.keys():
    sum += (adata[i] - aAvg)*(bdata[i] - bAvg)

  if N-1 <= 0:
    cov = 0
  else:
    cov = sum/(N-1)
 
  return cov
Exemple #7
0
  def get(self):
    user = self.get_user()
    if not user:
      return
      
    query_id = self.request.get('query_id')
    query = Query.get_by_id(query_id)
   
    datapoints = DataPoint.get_by_query(query)
    
    frequencies = common_word_frequencies(datapoints)

    self.response.out.write('[' + json.dumps(frequencies) + ']')
def avg_int_on_sliced_int(aquery, bquery, value):
  adatapoints = DataPoint.get_by_query(aquery)
  bdatapoints = DataPoint.get_by_query(bquery)
  adata = mapize_int_data(adatapoints)
  bdata = mapize_int_data(bdatapoints)

  # bucket sleep by days
  bdata = bucket_to_days(bdata)
  adata = bucket_to_days(adata)
 
  # throwout all the sleep dps that aren't 8 

  bdata = integer_cross_section(bdata, value)
     
  symmettrysize(adata, bdata)

  if len(adata) == 0:
    return 0

  # average the bdata values 
  avg = map_data_average(adata)

  # return it
  return avg
def analyze_integer_query_data(query):
  datapoints = DataPoint.get_by_query(query)

  analytic_list = []
  # basics
  analytic_list.extend(basic_suite(datapoints))
  # daily basics
  analytic_list.extend(daily_suite(datapoints))

  analytic_list.extend(covariance_suite(query))

  analytic_list.extend(correlation_suite(query))

  analytic_list.extend(crosssection_suite(query ))

  return analytic_list
Exemple #10
0
  def get(self):
    user = self.get_user()
    if not user:
      return

    user_email = self.request.get('user_email')
    query_id = self.request.get('query_id')

    user = User.get_by_email(user_email)
    query = db.get(query_id) #hmmm

    datapoints = []

    for datapoint in DataPoint.get_by_query(query):
      datapoints.append(datapoint.to_dict())

    self.response.out.write(json.dumps(datapoints))
Exemple #11
0
  def get(self):
    user = self.get_user()
    if not user:
      return

    user_email = self.request.get('user_email')
    
    user = User.get_by_email(user_email)
    queries = Query.get_by_user(user)

    datapoints = []

    for query in queries:
      for datapoint in DataPoint.get_by_query(query):
        datapoints.append(datapoint.to_dict())

    self.response.out.write(json.dumps(datapoints))
Exemple #12
0
  def get(self):
    user = self.get_user()
    if not user:
      return

    user_email = self.request.get('user_email')
    query_id = self.request.get('query_id')

    query = Query.get_by_id(query_id)

    datapoints = DataPoint.get_by_query(query)
    
    csv_data = ''
    
    for dp in datapoints:
      csv_data += self.dp_to_csv(dp)

    self.response.out.write(csv_data)
Exemple #13
0
  def query_data_from_db(self, query):
    query_template = open('ui/html/metric_data.html').read()

    rows =  ''
    # get all datapoints associated with the query
    datapoints = DataPoint.get_by_query(query)
    # for each datapoint from the query
    #   append data_point_to_row(dp)
    for dp in datapoints:
      rows += self.data_point_to_row(dp)
  
    params =  {
      'rows': rows, 
      'name': query.name, 
      'query_id': query.key()
    }

    return query_template % params
Exemple #14
0
  def post(self):
    user = self.get_user()
    if not user:
      return

    query_id = self.request.get('query_id')

    query = db.get(query_id)
    if not query:
      self.response.out.write('failure!')
      return
   
    # delete all the datapoints associated with the query as well.
    datapoints = DataPoint.get_by_query(query)

    for dp in datapoints:
      dp.delete()

    # finally, delete the query 
    query.delete()
def text_overview(query):
  datapoints = DataPoint.get_by_query(query)
  return 'Common Words: ' + common_words(datapoints)
def time_overview(query):
  datapoints = DataPoint.get_by_query(query)
  return 'Average Time: ' + str(average_time(datapoints))
Exemple #17
0
def query_average(query):
  datapoints = DataPoint.get_by_query(query)
  return average(datapoints)
def integer_overview(query):
  datapoints = DataPoint.get_by_query(query)
  return 'Average: ' + float_str_format(average(datapoints))
def query_range(query):
  datapoints = DataPoint.get_by_query(query)
  return data_range(datapoints)