Esempio n. 1
0
def wordSummary(db, table):
	f = open("wordSummary_%s.txt" % table, 'w')
	d = {}
	header = "word, length, rtAVG, rtSTD, total, percCorrect\n"
	f.write(header)
	wordList = []
	sql = "SELECT DISTINCT(word) FROM %s" % table
	for w in db.query(sql):
		wordList.append(w[0])
		
	for word in wordList:
		sql = "SELECT RT FROM %s WHERE word = '%s' AND incorrect = 0" % (table, word)
		wordLen = len(word)
		rtList = []
		zList = []

		for rt in db.query(sql):
			rtList.append(rt[0])

		rtAVG = stats.mean(rtList)
		rtSTD = stats.samplestdev(rtList)


		total = db.query("SELECT COUNT(*) FROM %s WHERE word = '%s'" % (table, word))[0][0]
		percCorrect = float(len(rtList)) / float(total) * 100.0

		print len(rtList), total

		myString = "%s, %i, %f, %f, %i, %f\n" % (word, wordLen, rtAVG, rtSTD, total, percCorrect)
		print myString
		f.write(myString)


	f.close()
Esempio n. 2
0
def wordSummary(db, table):
    f = open("wordSummary_%s.txt" % table, 'w')
    d = {}
    header = "word, length, rtAVG, rtSTD, total, percCorrect\n"
    f.write(header)
    wordList = []
    sql = "SELECT DISTINCT(word) FROM %s" % table
    for w in db.query(sql):
        wordList.append(w[0])

    for word in wordList:
        sql = "SELECT RT FROM %s WHERE word = '%s' AND incorrect = 0" % (table,
                                                                         word)
        wordLen = len(word)
        rtList = []
        zList = []

        for rt in db.query(sql):
            rtList.append(rt[0])

        rtAVG = stats.mean(rtList)
        rtSTD = stats.samplestdev(rtList)

        total = db.query("SELECT COUNT(*) FROM %s WHERE word = '%s'" %
                         (table, word))[0][0]
        percCorrect = float(len(rtList)) / float(total) * 100.0

        print len(rtList), total

        myString = "%s, %i, %f, %f, %i, %f\n" % (word, wordLen, rtAVG, rtSTD,
                                                 total, percCorrect)
        print myString
        f.write(myString)

    f.close()
Esempio n. 3
0
def writeByCategory(db, table):
	f = open("%s_by_category.txt" % table, "w")
	subList = db.getSubjects(table)

	for subject in subList:

		sex = db.getSex(subject, table)
		if sex == "male":
			se = 2
		elif sex == "female":
			se = 1

		f.write(str(subject) + "," + str(se) + ",")


		total = db.query("SELECT COUNT(*) FROM %s WHERE subject = %s" % (table, subject))[0][0]
		correct = db.query("SELECT SUM(incorrect) FROM %s WHERE subject = %s" % (table, subject))[0][0]

		
		if correct:
			percCorrect = float(correct) / float(total) * 100.0
		else:
			percCorrect = 0.0

		sql = "SELECT category, RT, zscore, incorrect FROM %s WHERE subject = %s" % (table, subject)
		d = {}

		for cat, RT, zscore, incorrect in db.query(sql):
			if d.has_key(cat):
				pass
			else:
				d[cat] = []


			if incorrect == 0 and zscore <= 3 and zscore >= -3:
				#print s, cat, RT, zscore, incorrect
				d[cat].append(RT)

		key = d.keys()
		key.sort()
		string = ""
		for k in key:
			print k
			if d[k]:
				avg = stats.mean(d[k])
				std = stats.samplestdev(d[k])
			else:
				avg = "NA"
				std = "NA"

			string = string + "," + str(avg) + "," + str(std)
		
		f.write(string)
		f.write("," + str(percCorrect) + "\n")

	f.close()
Esempio n. 4
0
def writeByCategory(db, table):
    f = open("%s_by_category.txt" % table, "w")
    subList = db.getSubjects(table)

    for subject in subList:

        sex = db.getSex(subject, table)
        if sex == "male":
            se = 2
        elif sex == "female":
            se = 1

        f.write(str(subject) + "," + str(se) + ",")

        total = db.query("SELECT COUNT(*) FROM %s WHERE subject = %s" %
                         (table, subject))[0][0]
        correct = db.query("SELECT SUM(incorrect) FROM %s WHERE subject = %s" %
                           (table, subject))[0][0]

        if correct:
            percCorrect = float(correct) / float(total) * 100.0
        else:
            percCorrect = 0.0

        sql = "SELECT category, RT, zscore, incorrect FROM %s WHERE subject = %s" % (
            table, subject)
        d = {}

        for cat, RT, zscore, incorrect in db.query(sql):
            if d.has_key(cat):
                pass
            else:
                d[cat] = []

            if incorrect == 0 and zscore <= 3 and zscore >= -3:
                #print s, cat, RT, zscore, incorrect
                d[cat].append(RT)

        key = d.keys()
        key.sort()
        string = ""
        for k in key:
            print k
            if d[k]:
                avg = stats.mean(d[k])
                std = stats.samplestdev(d[k])
            else:
                avg = "NA"
                std = "NA"

            string = string + "," + str(avg) + "," + str(std)

        f.write(string)
        f.write("," + str(percCorrect) + "\n")

    f.close()
Esempio n. 5
0
def calculateZ(db, table):
	subList = db.getSubjects(table)
	for s in subList:
		d = {}
		sql = "SELECT ROWID, RT, incorrect FROM %s WHERE subject = %s" % (table, s)
		for rowid, RT, incorrect in db.query(sql):
			if not incorrect:
				d[str(rowid)] = RT

		avg = stats.mean(d.values())
		std = stats.samplestdev(d.values())
		for rowid in d.keys():
			z = d[rowid] - avg / std
			sql = "UPDATE %s SET zscore = %f WHERE ROWID = %s" % (table, z, rowid)
			db.query(sql)
Esempio n. 6
0
def calculateZ(db, table):
    subList = db.getSubjects(table)
    for s in subList:
        d = {}
        sql = "SELECT ROWID, RT, incorrect FROM %s WHERE subject = %s" % (
            table, s)
        for rowid, RT, incorrect in db.query(sql):
            if not incorrect:
                d[str(rowid)] = RT

        avg = stats.mean(d.values())
        std = stats.samplestdev(d.values())
        for rowid in d.keys():
            z = d[rowid] - avg / std
            sql = "UPDATE %s SET zscore = %f WHERE ROWID = %s" % (table, z,
                                                                  rowid)
            db.query(sql)
Esempio n. 7
0
def make_subtrees_stddev(graph, ratio, distance, relabel=1, lab="cluster."):
   import stats
   cur_label = 0
   remove = []
   i = 0
   for edge in graph.get_edges():
      lengths = []
      path = {}
      #print node().get_main_id(), edge.cost
      get_lengths(edge.from_node, distance, lengths, 0, path)
      lengths.remove(edge.cost)
      #print lengths
      if not (len(lengths) > 1):
         continue
      mean = stats.mean(lengths)
      stdev2 = stats.samplestdev([mean, edge.cost])
      #print mean, stdev2, edge.cost, len(lengths)
      if stdev2 > ratio:
         #graph.remove_edge(edge)
         remove.append(edge)

   for edge in remove:
      graph.remove_edge(edge)

   if relabel:
      cur_label = 0
      for node in graph.get_nodes():
         node().classify_manual("")
      for node in graph.get_nodes():
         if node().get_main_id() == "":
            label(graph, node, lab, cur_label)
            cur_label += 1
   nodes = []

   for node in graph.get_nodes():
      nodes.append(node())

   return nodes
Esempio n. 8
0
print('relfreq:')
print(stats.relfreq(l))
print(stats.relfreq(lf))
print(stats.relfreq(a))
print(stats.relfreq(af))
print('\nVARIATION')
print('obrientransform:')
l = range(1,21)
a = N.array(l)
ll = [l]*5
aa = N.array(ll)

print(stats.obrientransform(l,l,l,l,l))
print(stats.obrientransform(a,a,a,a,a))
print('samplevar:',stats.samplevar(l),stats.samplevar(a))
print('samplestdev:',stats.samplestdev(l),stats.samplestdev(a))
print('var:',stats.var(l),stats.var(a))
print('stdev:',stats.stdev(l),stats.stdev(a))
print('sterr:',stats.sterr(l),stats.sterr(a))
print('sem:',stats.sem(l),stats.sem(a))
print('z:',stats.z(l,4),stats.z(a,4))
print('zs:')
print(stats.zs(l))
print(stats.zs(a))
print('\nTRIMMING')
print('trimboth:')
print(stats.trimboth(l,.2))
print(stats.trimboth(lf,.2))
print(stats.trimboth(a,.2))
print(stats.trimboth(af,.2))
print('trim1:')