def extractQuestFeatures(c, DIR): if not os.path.exists(DIR + 'posts'): os.makedirs(DIR + 'posts') accepted_answers = pd.DataFrame(query.acceptedAnswers(c), columns=header['accepted_answers']) accepted_answers.to_csv(DIR + 'posts/accepted_answers.csv', index=False) post_text_data = pd.DataFrame(query.postsText_Data(c), columns=header['post_text_data']) post_text_data = post_text_data.drop_duplicates(['PostId']) post_text_data.to_csv(DIR + 'posts/post_text_data.csv', encoding='utf-8', index=False) quest_stats = pd.DataFrame(query.questStats(c), columns=header['quest_stats']) quest_stats.to_csv(DIR + 'posts/quest_stats.csv', index=False)
def extractAnsTime(c, DIR, MODE, VERSION): if not os.path.exists(DIR + 'posts'): os.makedirs(DIR + 'posts') if MODE == 'FIRST': ans_time_first = query.firstAnsTime(c) util.save_to_csv(ans_time_first, header['ans_time_first'], 'posts/ans_time_first.csv') elif MODE == 'ACCEPTED': ans_time_accepted = pd.DataFrame(query.acceptAnsTime(c), columns=header['ans_time_accepted']) ans_time_accepted.to_csv(DIR + 'posts/ans_time_accepted.csv', index=False) elif MODE == 'UPVOTED': ans_time_upvoted = query.upvotedAnsTime(c) util.save_to_csv(ans_time_upvoted, header['ans_time_upvoted'], 'ans_time_upvoted.csv') if VERSION == 'EXTENDED': ans_time_upvoted_ex = query.upvotedAnsTime1(c) util.save_to_csv(ans_time_upvoted_ex, header['ans_time_upvoted_ex'], 'ans_time_upvoted_ex.csv')
def extractUserFeatures(c, DIR): if not os.path.exists(DIR + 'users'): os.makedirs(DIR + 'users') quest_per_user = pd.DataFrame(query.questPerUser(c), columns=header['quest_per_user']) quest_per_user.to_csv(DIR + 'users/quest_per_user.csv', index=False) answer_per_user = pd.DataFrame(query.ansPerUser(c), columns=header['answer_per_user']) answer_per_user.to_csv(DIR + 'users/answer_per_user.csv', index=False) posts_per_user = pd.DataFrame(query.postsPerUser(c), columns=header['posts_per_user']) posts_per_user.to_csv(DIR + 'users/posts_per_user.csv', index=False) users_stats = pd.DataFrame(query.usersStats(c), columns=header['users_stats']) users_stats.to_csv(DIR + 'users/users_stats.csv', encoding='utf-8', index=False) users_av_ans_time = pd.DataFrame(query.usersAvAnsTime(c), columns=header['users_av_ans_time']) users_av_ans_time.to_csv(DIR + 'users/users_av_ans_time.csv', index=False)
def extractUserActivityFeatures(c, DIR): if not os.path.exists(DIR + 'users'): os.makedirs(DIR + 'users') users_activity = pd.DataFrame(query.usersActivity(c), columns=header['users_activity']) users_activity.to_csv(DIR + 'posts/users_activity.csv', index=False) posts = useractivity.makePosts(users_activity) posts = posts[posts['UserId'] != -1] return posts
def main(argv): setPlotDesign() data = qry.queryDB( "select Date_format(First_Occurrence_Date, '%H') as 'Hour', Count(1) as 'Crime_Count' from DenverCrime where Is_Crime = 1 group by Date_format(First_Occurrence_Date, '%H');" ) queryToPlot(data, "#6DA2BE", 0, "All Crimes") data = qry.queryDB( "select Date_format(First_Occurrence_Date, '%H') as 'Hour', Count(1) as 'Crime_Count' from DenverCrime where Is_Crime = 1 and Offense_Code = " + offense[0] + " group by Date_format(First_Occurrence_Date, '%H');") #plt.subplot(112) queryToPlot(data, "#C16F65", .3, offense[1]) plt.legend(loc="upper center", frameon=False, fontsize=12) #plt.show() plt.savefig("CrimeHourlyBarGraphs/AllCrimes-" + offense[2] + "-percent.png", bbox_inches="tight", dpi=300)
def main(argv): cursor = qry.queryDB( "select TMax, Round(Sum(Crime_Count)/Count(1)) as CrimeCountAvg from DenverDailyCrimeWeather group by TMax" ) tMaxList = [] crimeCountAvgList = [] for (tMax, crimeCountAvg) in cursor: if tMax is not None: tMaxList.append(float(tMax)) crimeCountAvgList.append(int(crimeCountAvg)) plot(tMaxList, crimeCountAvgList)
def main(argv): cursor = qry.queryDB( "select Geo_Lon, Geo_Lat from DenverCrime where Geo_Lat > " + str(minGeoLat) + " and Geo_Lat < " + str(maxGeoLat) + " and Geo_Lon < " + str(maxGeoLon) + " and Geo_Lon > " + str(minGeoLon) + " and Offense_Code = " + crime[1]) locations = [] for (geoLon, geoLat) in cursor: if geoLon is not None and geoLat is not None: locations.append(Location(geoLon, geoLat)) #downloadMap() kMeans(locations, k, iteration)
def constructMatrix(self, donations): fullDict = q.getAllRecent() donationDict = {} donationDictReverse = {} count = 0 checked = [] m = [] spectralm = [] for donation in donations: self.data[donation[0]][1][0].pop() pastGrantYear = self.data[donation[0]][1][0] + donation[1:6] currentYear = list(fullDict[donation[0]]) + [COVER, COVER, COVER, COVER, COVER] m.append(pastGrantYear) m.append(currentYear) spectralm.append(self.data[donation[0]][1][0]) spectralm.append(list(fullDict[donation[0]])) donationDict[count] = donation[0] donationDictReverse[count + 1] = donation[0] donationDict[donation[0]] = (count, count + 1) checked.append(donation[0]) count = count + 2 for key, value in fullDict.items(): if key not in set(checked): m.append(list(value) + [COVER, COVER, COVER, COVER, COVER]) spectralm.append(list(value)) donationDict[key] = (count) donationDictReverse[count] = key count = count + 1 np.savetxt("test.csv", np.matrix(m), delimiter=",") sio.savemat('data.mat', {'a_dict': np.matrix(m)}) with open('dict.json', 'w') as fp: json.dump(donationDict, fp) with open('dictReverse.json', 'w') as fp: json.dump(donationDictReverse, fp) return spectralm
def init(): try: test = QueryDB.connection("bolt://localhost:7687", "neo4j", "test") except: print("Cannot connect to server") sys.exit() page = Tk() cypher = StringVar() output = StringVar() page.title("NLP Input") page.geometry("700x350") app = Frame(page) app.grid() btn1 = Button(app, text="Submit", command=lambda: setOutput(e1.get(), cypher, output, test)) lbl1 = Label(app, text="Enter your search: ") lbl2 = Label(app, text="Query in Cypher: ") lbl3 = Label(app, textvariable=cypher) lbl4 = Label(app, text="Query result: ") lbl5 = Label(app, textvariable=output) e1 = Entry(app) lbl1.grid(row=0) lbl2.grid(row=2) e1.grid(row=0, column=1) lbl3.grid(row=2, column=1) btn1.grid(row=0, column=2, padx=10, pady=10) lbl4.grid(row=3, column=0) lbl5.grid(row=3, column=1) page.mainloop()
for index, r in enumerate(computed_mat): if index < 76: if index % 2 != 0: translated_mat.append((fd.findRow(str(index)), map(decFunc, r))) else: translated_mat.append((fd.findRow(str(index)),r)) ranking = sorted(translated_mat, key=lambda s: judgement(s[1])) ranking = map(lambda x: (x[0], judgement(x[1])), ranking) return ranking[::-1] def spectralClustering(self): S = s.SpectralClustering(n_clusters=2, gamma=1.0, \ affinity='rbf', n_neighbors=10, assign_labels='kmeans') mat = self.performRegression() mat = np.asmatrix(mat) #print np.size(mat[:, [35, 36]]) X, test = d.make_circles(10000) print np.size(X) S.fit(X) print S.fit_predict(X) if __name__ == "__main__": R = Regression(q.getDonatedSchools()) # R.rankWinners(lambda x: x[4]) R.performRegression() #R.spectralClustering()
import tornado.web import tornado.ioloop from pymongo import MongoClient import PostServer import QueryDB # Connects to the default host and port on mongodb client = MongoClient() db = client.db query_db = QueryDB.QueryHandler(db) # Maps root URL to MainHandler # and passes db reference app = tornado.web.Application([(r"/", PostServer.MainHandler, dict(db=db))]) loop = tornado.ioloop.IOLoop.instance()
db_host = '127.0.0.1' db_user = '******' db_pw = '' db_name = 'infouser' # dlf_project_id = 'fir-thesis-v1' # dlf_session_id = 'a409931f1c2ea9842c238469417ae2ac490b36bd' # mgDLF = mgdia.manageDLF(projectID=dlf_project_id, # sessionID=dlf_session_id) conDB = conn.connectDB(host=db_host, username=db_user, password=db_pw, database=db_name) qy = qr.queryDB(conDB.getConnection()) dis_data = [ 'มีอารมณ์ซึมเศร้า', 'มีอารมณ์หงุดหงิด', 'มีอารมณ์ก้าวร้าว', 'ขาดความสนใจสิ่งรอบข้าง', 'ไม่ค่อยมีสมาธิเวลาทำสิ่งต่างๆ', 'รู้สึกอ่อนเพลีย', 'ทำอะไรก็เชื่องช้า', '่รับประทานอาหารมากขึ้น', 'รับประทานน้อยลง', 'นอนมากขึ้น', 'นอนน้อยลง', 'ตำหนิตัวเองเป็นอันดับแรกถ้ามีอะไรพลาด', 'พยายามฆ่าตัวตาย' ] print(dis_data) for dis in dis_data: query = 'INSERT INTO fact_data(fact_name) VALUES (\'{}\')'.format(dis)
import parse import QueryDB myConnection = QueryDB.connection("bolt://localhost:7687", "neo4j", "test") infile = open("testQueries.txt", "r") translations = [] results = [] for line in infile: translations.append(parse.init(line.strip())) for line in translations: results.append(myConnection.run_Return_Query(line).strip()) infile.close infile = open("testResults.txt", "r") answers = [] for line in infile: answers.append(line.strip()) for i in range(0, len(answers)): if (answers[i] == results[i]): print("Querie ", i, " successful") else: print("Querie ", i, " failed!") print("\tExpected:\t", answers[i]) print("\tActual:\t", results[i]) infile.close