Esempio n. 1
0
def extractQuestFeatures(c, DIR):
    if not os.path.exists(DIR + 'posts'):
        os.makedirs(DIR + 'posts')

    accepted_answers = pd.DataFrame(query.acceptedAnswers(c), columns=header['accepted_answers'])
    accepted_answers.to_csv(DIR + 'posts/accepted_answers.csv', index=False)

    post_text_data = pd.DataFrame(query.postsText_Data(c), columns=header['post_text_data'])
    post_text_data = post_text_data.drop_duplicates(['PostId'])
    post_text_data.to_csv(DIR + 'posts/post_text_data.csv', encoding='utf-8', index=False)

    quest_stats = pd.DataFrame(query.questStats(c), columns=header['quest_stats'])
    quest_stats.to_csv(DIR + 'posts/quest_stats.csv', index=False)
Esempio n. 2
0
def extractAnsTime(c, DIR, MODE, VERSION):
    if not os.path.exists(DIR + 'posts'):
        os.makedirs(DIR + 'posts')
    if MODE == 'FIRST':
        ans_time_first = query.firstAnsTime(c)
        util.save_to_csv(ans_time_first, header['ans_time_first'], 'posts/ans_time_first.csv')
    elif MODE == 'ACCEPTED':
        ans_time_accepted = pd.DataFrame(query.acceptAnsTime(c), columns=header['ans_time_accepted'])
        ans_time_accepted.to_csv(DIR + 'posts/ans_time_accepted.csv', index=False)
    elif MODE == 'UPVOTED':
        ans_time_upvoted = query.upvotedAnsTime(c)
        util.save_to_csv(ans_time_upvoted, header['ans_time_upvoted'], 'ans_time_upvoted.csv')
    if VERSION == 'EXTENDED':
        ans_time_upvoted_ex = query.upvotedAnsTime1(c)
        util.save_to_csv(ans_time_upvoted_ex, header['ans_time_upvoted_ex'], 'ans_time_upvoted_ex.csv')
Esempio n. 3
0
def extractUserFeatures(c, DIR):
    if not os.path.exists(DIR + 'users'):
        os.makedirs(DIR + 'users')
    quest_per_user = pd.DataFrame(query.questPerUser(c), columns=header['quest_per_user'])
    quest_per_user.to_csv(DIR + 'users/quest_per_user.csv', index=False)

    answer_per_user = pd.DataFrame(query.ansPerUser(c), columns=header['answer_per_user'])
    answer_per_user.to_csv(DIR + 'users/answer_per_user.csv', index=False)

    posts_per_user = pd.DataFrame(query.postsPerUser(c), columns=header['posts_per_user'])
    posts_per_user.to_csv(DIR + 'users/posts_per_user.csv', index=False)

    users_stats = pd.DataFrame(query.usersStats(c), columns=header['users_stats'])
    users_stats.to_csv(DIR + 'users/users_stats.csv', encoding='utf-8', index=False)

    users_av_ans_time = pd.DataFrame(query.usersAvAnsTime(c), columns=header['users_av_ans_time'])
    users_av_ans_time.to_csv(DIR + 'users/users_av_ans_time.csv', index=False)
Esempio n. 4
0
def extractUserActivityFeatures(c, DIR):
   if not os.path.exists(DIR + 'users'):
        os.makedirs(DIR + 'users')
   users_activity = pd.DataFrame(query.usersActivity(c), columns=header['users_activity'])
   users_activity.to_csv(DIR + 'posts/users_activity.csv', index=False)

   posts = useractivity.makePosts(users_activity)
   posts = posts[posts['UserId'] != -1]
   return posts
Esempio n. 5
0
def main(argv):
    setPlotDesign()
    data = qry.queryDB(
        "select Date_format(First_Occurrence_Date, '%H') as 'Hour', Count(1) as 'Crime_Count' from DenverCrime where Is_Crime = 1 group by Date_format(First_Occurrence_Date, '%H');"
    )
    queryToPlot(data, "#6DA2BE", 0, "All Crimes")
    data = qry.queryDB(
        "select Date_format(First_Occurrence_Date, '%H') as 'Hour', Count(1) as 'Crime_Count' from DenverCrime where Is_Crime = 1 and Offense_Code = "
        + offense[0] + " group by Date_format(First_Occurrence_Date, '%H');")
    #plt.subplot(112)
    queryToPlot(data, "#C16F65", .3, offense[1])

    plt.legend(loc="upper center", frameon=False, fontsize=12)
    #plt.show()
    plt.savefig("CrimeHourlyBarGraphs/AllCrimes-" + offense[2] +
                "-percent.png",
                bbox_inches="tight",
                dpi=300)
Esempio n. 6
0
def main(argv):
    cursor = qry.queryDB(
        "select TMax, Round(Sum(Crime_Count)/Count(1)) as CrimeCountAvg from DenverDailyCrimeWeather group by TMax"
    )
    tMaxList = []
    crimeCountAvgList = []
    for (tMax, crimeCountAvg) in cursor:
        if tMax is not None:
            tMaxList.append(float(tMax))
            crimeCountAvgList.append(int(crimeCountAvg))

    plot(tMaxList, crimeCountAvgList)
def main(argv):
    cursor = qry.queryDB(
        "select Geo_Lon, Geo_Lat from DenverCrime where Geo_Lat > " +
        str(minGeoLat) + " and Geo_Lat < " + str(maxGeoLat) +
        " and Geo_Lon < " + str(maxGeoLon) + " and Geo_Lon > " +
        str(minGeoLon) + " and Offense_Code = " + crime[1])
    locations = []
    for (geoLon, geoLat) in cursor:
        if geoLon is not None and geoLat is not None:
            locations.append(Location(geoLon, geoLat))

    #downloadMap()

    kMeans(locations, k, iteration)
Esempio n. 8
0
    def constructMatrix(self, donations):
        fullDict = q.getAllRecent()
        donationDict = {}
        donationDictReverse = {}
        count = 0
        checked = []
        m = []
        spectralm = []

        for donation in donations:
            self.data[donation[0]][1][0].pop()
            pastGrantYear = self.data[donation[0]][1][0] + donation[1:6]
            currentYear = list(fullDict[donation[0]]) + [COVER, COVER, COVER, COVER, COVER]

            m.append(pastGrantYear)
            m.append(currentYear)
            spectralm.append(self.data[donation[0]][1][0])
            spectralm.append(list(fullDict[donation[0]]))

            donationDict[count] = donation[0]
            donationDictReverse[count + 1] = donation[0]
            donationDict[donation[0]] = (count, count + 1)
            checked.append(donation[0])
            count = count + 2


        for key, value in fullDict.items():
            if key not in set(checked):
                m.append(list(value) + [COVER, COVER, COVER, COVER, COVER])
                spectralm.append(list(value))
                donationDict[key] = (count)
                donationDictReverse[count] = key
                count = count + 1

        np.savetxt("test.csv", np.matrix(m), delimiter=",")
        sio.savemat('data.mat', {'a_dict': np.matrix(m)})
        with open('dict.json', 'w') as fp:
            json.dump(donationDict, fp)
        with open('dictReverse.json', 'w') as fp:
            json.dump(donationDictReverse, fp)


        return spectralm
Esempio n. 9
0
def init():
    try:
        test = QueryDB.connection("bolt://localhost:7687", "neo4j", "test")
    except:
        print("Cannot connect to server")
        sys.exit()

    page = Tk()

    cypher = StringVar()
    output = StringVar()

    page.title("NLP Input")
    page.geometry("700x350")

    app = Frame(page)
    app.grid()

    btn1 = Button(app,
                  text="Submit",
                  command=lambda: setOutput(e1.get(), cypher, output, test))

    lbl1 = Label(app, text="Enter your search: ")
    lbl2 = Label(app, text="Query in Cypher: ")
    lbl3 = Label(app, textvariable=cypher)
    lbl4 = Label(app, text="Query result: ")
    lbl5 = Label(app, textvariable=output)

    e1 = Entry(app)

    lbl1.grid(row=0)
    lbl2.grid(row=2)

    e1.grid(row=0, column=1)
    lbl3.grid(row=2, column=1)

    btn1.grid(row=0, column=2, padx=10, pady=10)

    lbl4.grid(row=3, column=0)
    lbl5.grid(row=3, column=1)

    page.mainloop()
Esempio n. 10
0
        for index, r in enumerate(computed_mat):
            if index < 76:
                if index % 2 != 0:
                    translated_mat.append((fd.findRow(str(index)),
                        map(decFunc, r)))
            else:
                translated_mat.append((fd.findRow(str(index)),r))

        ranking = sorted(translated_mat, key=lambda s: judgement(s[1]))
        ranking = map(lambda x: (x[0], judgement(x[1])), ranking)
        return ranking[::-1]

    def spectralClustering(self):
        S = s.SpectralClustering(n_clusters=2, gamma=1.0, \
                            affinity='rbf', n_neighbors=10, assign_labels='kmeans')

        mat = self.performRegression()
        mat = np.asmatrix(mat)
        #print np.size(mat[:, [35, 36]])

        X, test = d.make_circles(10000)
        print np.size(X)
        S.fit(X)
        print S.fit_predict(X)

if __name__ == "__main__":
    R = Regression(q.getDonatedSchools())
    # R.rankWinners(lambda x: x[4])
    R.performRegression()
    #R.spectralClustering()
Esempio n. 11
0
import tornado.web
import tornado.ioloop
from pymongo import MongoClient

import PostServer
import QueryDB

# Connects to the default host and port on mongodb
client = MongoClient()
db = client.db
query_db = QueryDB.QueryHandler(db)

# Maps root URL to MainHandler
# and passes db reference
app = tornado.web.Application([(r"/", PostServer.MainHandler, dict(db=db))])

loop = tornado.ioloop.IOLoop.instance()
Esempio n. 12
0
db_host = '127.0.0.1'
db_user = '******'
db_pw = ''
db_name = 'infouser'

# dlf_project_id = 'fir-thesis-v1'
# dlf_session_id = 'a409931f1c2ea9842c238469417ae2ac490b36bd'

# mgDLF = mgdia.manageDLF(projectID=dlf_project_id,
#                         sessionID=dlf_session_id)

conDB = conn.connectDB(host=db_host,
                       username=db_user,
                       password=db_pw,
                       database=db_name)

qy = qr.queryDB(conDB.getConnection())

dis_data = [
    'มีอารมณ์ซึมเศร้า', 'มีอารมณ์หงุดหงิด', 'มีอารมณ์ก้าวร้าว',
    'ขาดความสนใจสิ่งรอบข้าง', 'ไม่ค่อยมีสมาธิเวลาทำสิ่งต่างๆ',
    'รู้สึกอ่อนเพลีย', 'ทำอะไรก็เชื่องช้า', '่รับประทานอาหารมากขึ้น',
    'รับประทานน้อยลง', 'นอนมากขึ้น', 'นอนน้อยลง',
    'ตำหนิตัวเองเป็นอันดับแรกถ้ามีอะไรพลาด', 'พยายามฆ่าตัวตาย'
]

print(dis_data)

for dis in dis_data:
    query = 'INSERT INTO fact_data(fact_name) VALUES (\'{}\')'.format(dis)
Esempio n. 13
0
import parse
import QueryDB

myConnection = QueryDB.connection("bolt://localhost:7687", "neo4j", "test")

infile = open("testQueries.txt", "r")
translations = []
results = []
for line in infile:
    translations.append(parse.init(line.strip()))
for line in translations:
    results.append(myConnection.run_Return_Query(line).strip())
infile.close

infile = open("testResults.txt", "r")
answers = []
for line in infile:
    answers.append(line.strip())
for i in range(0, len(answers)):
    if (answers[i] == results[i]):
        print("Querie ", i, " successful")
    else:
        print("Querie ", i, " failed!")
        print("\tExpected:\t", answers[i])
        print("\tActual:\t", results[i])
infile.close