# review word length, average weighted score, maximum word score, minimum
# word score, and pure mathematical average.
#    From group result of the yelp_review_flat and selected category
# categories table: the categories user ever commented.
#
# Notice: guest database account will not have right to modify the database.

import psycopg2
import sys
import re
import scoresentence

TEST = False  # True if generating test data. False for training data.

if __name__ == '__main__':
    connection = scoresentence.dbConnect()
    conn = connection[1]
    cursor = connection[0]
    cursor.execute("SELECT * FROM categories;")
    cat = []
    for item1 in cursor.fetchall():
        cat.append(item1[1])
    if (TEST):
        end = "_test"
    else:
        end = ""

    cursor.execute("SELECT distinct(uid) FROM features" + end + ";")
    uid_set = cursor.fetchall()
    count = 0
    for uid in uid_set:
        if OUTTOFILE:
            print >> out, str(uid) + ", " + str(bid) + ", " + str(word_len) + ", " + str(weighted_score) + ", " + str(
                maximum
            ) + ", " + str(minimum) + ", " + str(avg)
        else:
            print str(word_len) + ", " + str(weighted_score)
        total = total + weighted_score
        if weighted_score > maximum:
            maximum = weighted_score
        if weighted_score < minimum:
            minimum = weighted_score
    print "Average score is: " + str(total / len(score[4])) + "\nMaximum score is: " + str(
        maximum
    ) + "\nMinimum score is: " + str(minimum)


if __name__ == "__main__":
    connect = scoresentence.dbConnect()
    cursor = connect[0]
    conn = connect[1]
    st = LancasterStemmer()
    cursor.execute("SELECT avg(star_sum/number), avg((star_sum / number) * (star_sum / number)) from word_star;")
    stat_data = cursor.fetchall()
    avg_score = stat_data[0][0]
    avg_score_square = stat_data[0][1]
    std_dev = (float(avg_score_square) - (float(avg_score) ** (2))) ** (1 / 2.0)
    score = scoreSentence(cursor, st, avg_score, std_dev)
    printscore(score, OUTTOFILE, END)
    cursor.close()
    conn.close()
Example #3
0
# This file is used to insert the generated file from getDataDistribution.py
# to the database.
#
# Notice: guest database account will not have right to modify the database.

import psycopg2
import sys
import re
import scoresentence

START = 1  # start file name
END = 2  # end file name

if __name__ == "__main__":
    ret = scoresentence.dbConnect()
    conn = ret[1]
    cursor = ret[0]
    for x in range(START, END):
        count = 0
        f = open("scoredistributiontop" + str(x) + "000.txt", 'r')
        for line in f:
            count = count + 1
            split = line.strip().split(", ")
            line = "INSERT INTO features_test values('" + split[
                0] + "', '" + split[1] + "', " + split[2] + ", " + split[
                    3] + ", " + split[4] + ", " + split[5] + ", " + split[
                        6] + "); "
            print "Insert into the row " + str(count)
            cursor.execute(line)
    conn.commit()
    cursor.close()
# review word length, average weighted score, maximum word score, minimum
# word score, and pure mathematical average.
#    From group result of the yelp_review_flat and selected category
# categories table: the categories user ever commented.
#
# Notice: guest database account will not have right to modify the database.

import psycopg2
import sys
import re
import scoresentence

TEST = False # True if generating test data. False for training data.

if __name__ == '__main__':
    connection = scoresentence.dbConnect()
    conn = connection[1]
    cursor = connection[0]
    cursor.execute("SELECT * FROM categories;")
    cat = []
    for item1 in cursor.fetchall():
        cat.append(item1[1])
    if (TEST):
        end = "_test"
    else:
        end = ""

    cursor.execute("SELECT distinct(uid) FROM features" + end + ";")
    uid_set = cursor.fetchall()
    count = 0
    for uid in uid_set: