def count_probs(): try: dbconfig = read_db_config() conn = MySQLConnection(**dbconfig) cursor = conn.cursor(buffered=True) cursor.execute("SELECT * FROM dictionary") results = cursor.fetchall() cursor.execute("SELECT * FROM meta_class") classes = cursor.fetchall() joy = float(classes[0][2]) fear = float(classes[1][2]) anger = float(classes[2][2]) sad = float(classes[3][2]) disgust = float(classes[4][2]) shame = float(classes[5][2]) spinner = PieSpinner('\nCounting Probabilities :: ') for res in results: # print(res[1]) spinner.next() total_occurences = res[2] + res[3] + res[4] + res[5] + res[ 6] + res[7] # if total_occurences < 10: # print(res) joy_probs = res[2] / joy fear_probs = res[3] / fear anger_probs = res[4] / anger sad_probs = res[5] / sad disgust_probs = res[6] / disgust shame_probs = res[7] / shame cursor.execute("UPDATE dictionary SET joy_probs=%(joy)s, fear_probs=%(fear)s, anger_probs=%(anger)s, sadness_probs=%(sad)s, \ disgust_probs=%(disgust)s, shame_probs=%(shame)s WHERE id=%(target)s" , {'joy':joy_probs, \ 'fear': fear_probs, 'anger': anger_probs, 'sad': sad_probs, 'disgust': disgust_probs, 'shame': shame_probs, 'target': res[0]}) # classes = ['joy', 'fear', 'anger', 'sadness', 'disgust', 'shame', 'guilt'] except Error as e: print(e) finally: conn.commit() cursor.close() conn.close() spinner.finish() # return 1 # if __name__ == '__main__': # count_probs()
str(response['error'])) continue spinner = PieSpinner() if (fileCount > 0): print "Parsing " + str(fileCount) + " files..." spinner.next() else: print "No files older than 10 days found. \r", spinning = True while spinning: if len(response["files"]) == 0: spinner.finish() spinning = False break elif (whileCount >= fileCount and whileCount > 1): spinner.finish() spinning = False print "We couldn't delete some files posted by other users on private conversations." break else: iteratorCounter = 0 spinner.next() for f in response["files"]: iteratorCounter += 1 spinner.next() #get user info for this file userInfoUrl = "https://slack.com/api/users.info"
def count_word_occurences(start, end): try: dbconfig = read_db_config() conn = MySQLConnection(**dbconfig) cursor = conn.cursor(buffered=True) cursor.execute("TRUNCATE dictionary") classes = [1, 2, 3, 4, 5, 6] spinner = PieSpinner("\nCounting Word Occurences ") for target in classes: if start == 1: cursor.execute( "SELECT class, sentence FROM data3 WHERE class=%(target)s and id > %(id_target)s", { 'target': target, 'id_target': end }) elif end == 7433: cursor.execute( "SELECT class, sentence FROM data3 WHERE class=%(target)s and id < %(id_target)s", { 'target': target, 'id_target': start }) else: cursor.execute("SELECT class, sentence FROM data3 WHERE class=%(target)s and (id < %(id_start)s or id > %(id_end)s)", {'target': target,\ 'id_start': start, 'id_end': end}) class_documents = cursor.fetchall() for row in class_documents: check = False sentence = row[1].split(' ') for word in sentence: spinner.next() if len(word) > 0: if row[0] == '1': cursor.execute( "SELECT word, joy_occurences from dictionary WHERE word=%(target)s", {'target': word}) elif row[0] == '2': cursor.execute( "SELECT word, fear_occurences from dictionary WHERE word=%(target)s", {'target': word}) elif row[0] == '3': cursor.execute( "SELECT word, anger_occurences from dictionary WHERE word=%(target)s", {'target': word}) elif row[0] == '4': cursor.execute( "SELECT word, sadness_occurences from dictionary WHERE word=%(target)s", {'target': word}) elif row[0] == '5': cursor.execute( "SELECT word, disgust_occurences from dictionary WHERE word=%(target)s", {'target': word}) elif row[0] == '6': cursor.execute( "SELECT word, shame_occurences from dictionary WHERE word=%(target)s", {'target': word}) check_word = cursor.fetchone() if check_word != None: result = check_word[1] + 1 if row[0] == '1': cursor.execute( "UPDATE dictionary SET joy_occurences=%(number)s WHERE word=%(target)s", { 'number': result, 'target': word }) # cursor.execute("SELECT word, joy_occurences from dictionary WHERE word=%(target)s", {'target':word}) elif row[0] == '2': cursor.execute( "UPDATE dictionary SET fear_occurences=%(number)s WHERE word=%(target)s", { 'number': result, 'target': word }) # cursor.execute("SELECT word, fear_occurences from dictionary WHERE word=%(target)s", {'target':word}) elif row[0] == '3': cursor.execute( "UPDATE dictionary SET anger_occurences=%(number)s WHERE word=%(target)s", { 'number': result, 'target': word }) # cursor.execute("SELECT word, anger_occurences from dictionary WHERE word=%(target)s", {'target':word}) elif row[0] == '4': cursor.execute( "UPDATE dictionary SET sadness_occurences=%(number)s WHERE word=%(target)s", { 'number': result, 'target': word }) # cursor.execute("SELECT word, sadness_occurences from dictionary WHERE word=%(target)s", {'target':word}) elif row[0] == '5': cursor.execute( "UPDATE dictionary SET disgust_occurences=%(number)s WHERE word=%(target)s", { 'number': result, 'target': word }) # cursor.execute("SELECT word, disgust_occurences from dictionary WHERE word=%(target)s", {'target':word}) elif row[0] == '6': cursor.execute( "UPDATE dictionary SET shame_occurences=%(number)s WHERE word=%(target)s", { 'number': result, 'target': word }) # cursor.execute("SELECT word, shame_occurences from dictionary WHERE word=%(target)s", {'target':word}) else: if row[0] == '1': cursor.execute( "INSERT INTO dictionary(word, joy_occurences) VALUES(%(target)s, 1)", {'target': word}) elif row[0] == '2': cursor.execute( "INSERT INTO dictionary(word, fear_occurences) VALUES(%(target)s, 1)", {'target': word}) elif row[0] == '3': cursor.execute( "INSERT INTO dictionary(word, anger_occurences) VALUES(%(target)s, 1)", {'target': word}) elif row[0] == '4': cursor.execute( "INSERT INTO dictionary(word, sadness_occurences) VALUES(%(target)s, 1)", {'target': word}) elif row[0] == '5': cursor.execute( "INSERT INTO dictionary(word, disgust_occurences) VALUES(%(target)s, 1)", {'target': word}) elif row[0] == '6': cursor.execute( "INSERT INTO dictionary(word, shame_occurences) VALUES(%(target)s, 1)", {'target': word}) except Error as e: print(e) finally: conn.commit() cursor.close() conn.close() spinner.finish() # if __name__ == '__main__': # count_word_occurences(5, 10)
def count_class_word(start, end): try: dbconfig = read_db_config() conn = MySQLConnection(**dbconfig) cursor = conn.cursor(buffered=True) cursor.execute("TRUNCATE meta_class") # classes = ['joy', 'fear', 'anger', 'sadness', 'disgust', 'shame'] classes = [1, 2, 3, 4, 5, 6] i = 1 class_count = [0, 0, 0, 0, 0, 0, 0] spinner = PieSpinner('\nCounting Class Word :: ') for target in classes: text_list = [] # cursor.execute("SELECT class,SIT FROM data3 WHERE class=%(mytarget)s and id < 7001", {'mytarget': target}) if start == 1: cursor.execute( "SELECT class, sentence FROM data3 WHERE class=%(target)s and id > %(id_target)s", { 'target': target, 'id_target': end }) elif end == 7433: cursor.execute( "SELECT class, sentence FROM data3 WHERE class=%(target)s and id < %(id_target)s", { 'target': target, 'id_target': start }) else: cursor.execute("SELECT class, sentence FROM data3 WHERE class=%(target)s and (id < %(id_start)s or id > %(id_end)s)", {'target': target,\ 'id_start': start, 'id_end': end}) result = cursor.fetchall() for res in result: words = res[1].split(' ') for word in words: if word not in text_list and len(word) > 0: # print(word) text_list.append(word) spinner.next() class_count[i] = len(text_list) cursor.execute( "INSERT INTO meta_class values(%(id)s, %(class)s, %(word)s)", { 'id': i, 'class': classes[i - 1], 'word': len(text_list) }) i += 1 spinner.finish() # print(class_count) # break except Error as e: print(e) finally: conn.commit() cursor.close() conn.close() return 1 # if __name__ == '__main__': # process_words()
def magic(): global wilma_url clearScreen() custom_url = input( f"Käytetään wilma-osoitetta \"{wilma_url}\".\nPaina Enter jos tämä kelpaa. Jos ei kelpaa, kirjoita oma: " ) if custom_url.strip() != "": wilma_url = custom_url clearScreen() print(colored("Sinun on kirjauduttava Wilmaan!", "yellow")) username = input("Käyttäjätunnuksesi: ").strip() password = getpass(prompt="Salasanasi: ") clearScreen() with requests.Session() as r: spinner = Spinner("Kirjaudutaan... ") g = r.get(wilma_url) spinner.next() soup = bs(g.text, "html.parser") spinner.next() token = soup.select_one("input[name=SESSIONID]").get("value") spinner.next() data = { "Login": username, "Password": password, "SESSIONID": token, } p = r.post(f"{wilma_url}/login", data=data) if "loginfailed" in p.url: print(colored("\nKirjautuminen epäonnistui", "red")) exit() spinner.next() spinner.finish() print(colored("Kirjautuminen onnistui!\n", "green")) try: spinner = Spinner("Hankitaan API-tokeni... ") g = r.get(wilma_url) spinner.next() soup = bs(g.text, "html.parser") spinner.next() token = soup.select_one("input[name=formkey]").get("value") spinner.next() spinner.finish() print(colored("Tokeni löytyi!\n", "green")) except: print(colored("Tokenin haku mokas.", "red")) exit() g = r.get(f"{wilma_url}/selection/view") soup = bs(g.text, "html.parser") parent = soup.select_one("#own-schools") years = [] for index, child in enumerate(parent.find_all("h4")): years.append({ "title": child.text.strip(), "periods": [ link["href"] for link in parent.find_all("ul")[index].find_all("a") ] }) selection_menu = SelectionMenu( [year["title"] for year in years], "Lukuvuosi", "Valitse oikea lukuvuosi, josta löytyy haluamasi kurssit.", show_exit_option=False) selection_menu.show() periods = years[selection_menu.selected_option]["periods"] master = tk.Tk() master.resizable(False, False) master.title('Haluamasi kurssit') master.eval('tk::PlaceWindow . center') def getInput(): globals()["courses_input"] = textarea.get("1.0", "end-1c") master.after(1, master.destroy()) title = tk.Label( master, text= "Liitä tähän kaikki haluamasi kurssit.\nVoit erottaa ne miten tahansa (pilkut, rivivälit, jne.)" ) title.grid(row=0, column=0) textarea = tk.Text(master, height=30, width=38) textarea.grid(row=1, column=0) btn = tk.Button(master, text="Done.", justify="center", command=getInput) btn.grid(row=2, column=0) master.mainloop() course_regex = r"([A-z0-9öÖäÄåÅ]+[\.0-9]+)" courses = [ course.group(0) for course in re.finditer(course_regex, globals()["courses_input"], re.MULTILINE) ] courses = [{ "name": course, "id": "", "selected": False } for course in courses] print(colored(f"{len(courses)} kurssin nimeä tunnistettu", "green")) bar = ShadyBar("Etsitään kurssit", max=(len(courses) * len(periods)), suffix="%(percent)d%%") for period in periods: g = r.get(f"{wilma_url}/selection/{period}") soup = bs(g.text, "html.parser") for course in courses: try: id = soup.find("a", string=course["name"])["href"] course["id"] = id except: pass finally: bar.next() failed = list(filter(lambda course: course["id"] == "", courses)) success = list(filter(lambda course: course["id"] != "", courses)) bar.finish() if len(failed) != 0: print(colored("Nämä kurssit eivät löytyneet:", "red")) for fail in failed: print(fail["name"]) cont = input( "\nJatketaanko silti?\nPaina Enter jatkakseen ja jotain muuta lopetakseen: " ) if cont != "": print(colored("\nOhjelma suljetaan.", "red")) exit() else: print( colored(f"Kaikki {len(success)} kurssia löydetty!\n", "green")) thetime = input( "\nMihin aikaan kurssivalinnat alkavat?\nJos haluat, että kurssit valitaan heti, paina Enter.\nMuuten, kirjoita muodossa \"16.00\": " ) if thetime.strip() != "": (hours, minutes) = [ int(t) for t in thetime.strip().replace(".", ":").split(":") ] fire = datetime.datetime.now().replace(hour=hours, minute=minutes, second=1) print(colored(f"Nyt odotetaan {thetime} asti...\n", "green")) pause.until(fire) else: print(colored("Aloitetaan heti!\n", "green")) time.sleep(0.5) clearScreen() start = time.time() bar = ShadyBar("Valitaan kurssit", max=(len(success))) with ThreadPoolExecutor(max_workers=30) as ex: futures = [ ex.submit( select, r, { "message": "pick-group", "target": course["id"], "formkey": token, }) for course in success ] for fut in as_completed(futures): fut.result() bar.next() bar.finish() print( colored( "Kaikki kurssit valittu {0:0.1f} sekunnissa.".format( time.time() - start), "green"))
class K_means: centroidList = [] docCluster = {} oldDocCluster = {} docVector = {} docsJson={} def __init__(self): # CLUSTER_NUM = k self.progress_bar = PieSpinner('Clustering') def initCentroid(self, k): self.centroidList = [] visitedDoc = [] while len(self.centroidList) < k: r = random.choice(list(self.docVector.keys())) if r not in visitedDoc: visitedDoc.append(r) v = Vector(self.docVector[r].dict.copy()) # v.dict = self.docVector[r].dict.copy() self.centroidList.append(v) def nearestCentroid(self, docID): similarities = [u.sim(self.docVector[docID]) for u in self.centroidList] return max(enumerate(similarities), key=itemgetter(1))[0] def updateCentroid(self): k = len(self.centroidList) self.centroidList.clear() numberOfDoc = [] for i in range(0, k): self.centroidList.append(Vector()) numberOfDoc.append(0) for d in self.docVector.keys(): c = self.docCluster[d] numberOfDoc[c] += 1 for t in self.docVector[d].dict.keys(): if t in self.centroidList[c].dict.keys(): self.centroidList[c].dict[t] += self.docVector[d].dict[t] else: self.centroidList[c].dict[t] = self.docVector[d].dict[t] for c in range(0, len(self.centroidList)): for v in self.centroidList[c].dict.values(): v /= numberOfDoc[c] def J(self): j = 0 for d in self.docCluster.keys(): j += self.docVector[d].distance2(self.centroidList[self.docCluster[d]]) return j def terminateCondition(self): if len(self.oldDocCluster) == 0: return False for id in self.docCluster: if self.docCluster[id] != self.oldDocCluster[id]: return False return True def findCandidateText(self, k): terms = [] m = [[] for x in range(len(self.centroidList))] for d in self.docVector.values(): for t in d.dict.keys(): if t not in terms: terms.append(t) for c in range(len(self.centroidList)): m[c].append(self.I(t, c)) result = [] for d in m: z = list(zip(d,terms)) z.sort(key = lambda x:x[0],reverse=True) # d.sort(reverse=True) result.append(list(map(lambda x: x[1], z[:k]))) return result def I(self, term, cluster): n = len(self.docVector) n00 = n10 = n11 = n01 = 0 for id in self.docVector: if self.docCluster[id] == cluster: if term in self.docVector[id].dict.keys(): n11 += 1 else: n01 += 1 else: if term in self.docVector[id].dict.keys(): n10 += 1 else: n00 += 1 n1_ = n10 + n11 n_1 = n01 + n11 n0_ = n00 + n01 n_0 = n00 + n10 # #print('cluster : '+cluster.__str__()) # #print('n00 = ',n00) # #print('n01 = ', n01) # #print('n10 = ',n10) # #print('n11 = ', n11) a1 = n11 / n * log2(n * n11 / (n1_ * n_1)) if n11 != 0 else 0 a2 = n01 / n * log2(n * n01 / (n0_ * n_1)) if n01 != 0 else 0 a3 = n10 / n * log2(n * n10 / (n1_ * n_0)) if n10 != 0 else 0 a4 = n00 / n * log2(n * n00 / (n0_ * n_0)) if n00 != 0 else 0 return a1 +a2 + a3 + a4 def clusterDocs(self): api = TermVectorAPI(ELASTIC_URL) #print('start read files') for file in map(lambda x: os.path.join(CLUSTER_SOURCE_DIRECTORY,x),list_files(CLUSTER_SOURCE_DIRECTORY, '*.json')): with open(file, 'r') as readFile: doc = json.load(readFile) self.docsJson[doc['id']]= doc self.docVector[doc['id']] = Vector(api.get_term_vector(INDEX_NAME, DOCUMENT_TYPE, doc['id'])) #print('read all files successfully') #print('start init centroid') self.initCentroid(CLUSTER_NUM) #print('end init centroid') while True: self.oldDocCluster = self.docCluster.copy() self.docCluster = {} for docID in self.docsJson.keys(): self.docCluster[docID] = self.nearestCentroid(docID) self.updateCentroid() self.progress_bar.next() #print('one step clustring') if (self.terminateCondition()): self.progress_bar.finish() break #print('converge clustring') print('K = ',CLUSTER_NUM,' J = ',self.J()) candids = self.findCandidateText(CLUSTER_CANDIDATE_TEXT_LEN) #print('calc candid') c = [[] for x in range(len(self.centroidList))] for d in self.docCluster.keys(): c[self.docCluster[d]].append(d) #print('start save result') os.makedirs(CLUSTER_DESTINATION_DIRECTORY, exist_ok=True) os.makedirs(CLUSTER_CANDIDATE_TEXT_DIRECTORY, exist_ok=True) for i in range(len(self.centroidList)): res = {} res['id'] = i res['name'] = candids[i] res['pages'] = c[i] fileName = i.__str__()+'.json' print('Cluster {}: {}\tnumber of docs: {}', i, ' '.join(candids[i]), len(c[i])) #print(res) with open(os.path.join(CLUSTER_CANDIDATE_TEXT_DIRECTORY, fileName), 'w') as outfile: json.dump(res, outfile) for id in self.docsJson.keys(): self.docsJson[id]['cluster'] = self.docCluster[id] file_name = '{}.json'.format(id) with open(os.path.join(CLUSTER_DESTINATION_DIRECTORY , file_name), 'w') as outfile: json.dump(self.docsJson[id], outfile)
def test_naive_bayes(start, end): try: dbconfig = read_db_config() conn = MySQLConnection(**dbconfig) cursor = conn.cursor(buffered=True) cursor.execute("select count(id) as total from data3 group by class") total_class = cursor.fetchall() joy = float(total_class[0][0]) fear = float(total_class[0][0]) anger = float(total_class[0][0]) disgust = float(total_class[0][0]) sad = float(total_class[0][0]) shame = float(total_class[0][0]) cursor.execute("SELECT words FROM meta_class") class_word = cursor.fetchall() word_each_class = [ class_word[0][0], class_word[1][0], class_word[2][0], class_word[3][0], class_word[4][0], class_word[5][0] ] # class_list = [classes[0].split(' ')[0], classes[1].split(' ')[0], classes[2].split(' ')[0], \ # classes[3].split(' ')[0], classes[4].split(' ')[0], classes[5].split(' ')[0]] class_list = ['1', '2', '3', '4', '5', '6'] if start == 1: cursor.execute("SELECT * FROM data3 WHERE id <= %(id_target)s", {'id_target': end}) elif end == 7433: cursor.execute("SELECT * FROM data3 WHERE id => %(id_target)s", {'id_target': start}) else: cursor.execute( "SELECT * FROM data3 WHERE id >= %(id_start)s and id <= %(id_end)s", { 'id_start': start, 'id_end': end }) # cursor.execute("SELECT * FROM data WHERE id > 7600") results = cursor.fetchall() results_amount = len(results) # cursor.execute("SELECT COUNT(*) FROM data2 WHERE id > 7600") # results_amount = cursor.fetchone() # results_amount = results_amount[0] true_amount = 0 # test_amount = 0 print results_amount cursor.execute("SELECT * from data3") print("start :: " + str(start)) print("end :: " + str(end)) total_data = len(cursor.fetchall()) print total_data total_data = total_data - results_amount # print total_data joy_prior_probs = joy / total_data fear_prior_probs = fear / total_data anger_prior_probs = anger / total_data sadness_prior_probs = sad / total_data disgust_prior_probs = disgust / total_data shame_prior_probs = shame / total_data pie = PieSpinner('\nTesting Naive Bayes :: ') for res in results: pie.next() sentence = res[2].split(' ') joy_x = 1 fear_x = 1 anger_x = 1 sadness_x = 1 disgust_x = 1 shame_x = 1 check = False for word in sentence: if word != " ": check = True if check and len(word) > 2: cursor.execute( "SELECT joy_probs from dictionary WHERE word=%(target)s", {'target': word}) probs_res = cursor.fetchone() if (cursor.rowcount > 0 and probs_res[0] > 0): # print(probs_res[0]) joy_x = joy_x * probs_res[0] else: joy_x = joy_x * (float(1) / float(word_each_class[0])) cursor.execute( "SELECT fear_probs from dictionary where word=%(target)s", {'target': word}) probs_res = cursor.fetchone() if (cursor.rowcount > 0 and probs_res[0] > 0): fear_x = fear_x * probs_res[0] else: fear_x = fear_x * (float(1) / float(word_each_class[1])) cursor.execute( "SELECT anger_probs from dictionary where word=%(target)s", {'target': word}) probs_res = cursor.fetchone() if (cursor.rowcount > 0 and probs_res[0] > 0): anger_x = anger_x * probs_res[0] else: anger_x = anger_x * (float(1) / float(word_each_class[2])) cursor.execute( "SELECT sadness_probs from dictionary WHERE word=%(target)s", {'target': word}) probs_res = cursor.fetchone() if (cursor.rowcount > 0 and probs_res[0] > 0): sadness_x = sadness_x * probs_res[0] else: sadness_x = sadness_x * (float(1) / float(word_each_class[3])) cursor.execute( "SELECT disgust_probs from dictionary WHERE word=%(target)s", {'target': word}) probs_res = cursor.fetchone() if (cursor.rowcount > 0 and probs_res[0] > 0): disgust_x = disgust_x * probs_res[0] else: disgust_x = disgust_x * (float(1) / float(word_each_class[4])) cursor.execute( "SELECT shame_probs from dictionary WHERE word=%(target)s", {'target': word}) probs_res = cursor.fetchone() if (cursor.rowcount > 0 and probs_res[0] > 0): shame_x = shame_x * probs_res[0] else: shame_x = shame_x * (float(1) / float(word_each_class[5])) # break check = True joy_probs = float(joy_x) * joy_prior_probs * 1000000000 fear_probs = float(fear_x) * fear_prior_probs * 1000000000 anger_probs = float(anger_x) * anger_prior_probs * 1000000000 sadness_probs = float(sadness_x) * sadness_prior_probs * 1000000000 disgust_probs = float(disgust_x) * disgust_prior_probs * 1000000000 shame_probs = float(shame_x) * shame_prior_probs * 1000000000 # probs_list = {1: joy_probs, 2: fear_probs, 3: anger_probs, 4: sadness_probs, 5: disgust_probs, 6: shame_probs, 7: guilt_probs} # print(probs_list[6]) # probs_list = sorted(probs_list.items(), key=operator.itemgetter(1)) # print(probs_list) probs_list = [ joy_probs, fear_probs, anger_probs, disgust_probs, sadness_probs, shame_probs ] # print("\nJoy : {0:.35f},\nFear : {1:.35f}, \nAnger : {2:.35f}, \nDisgust: {3:.35f}, \nSad : {4:.35f}, \nShame : {5:.35f}" \ # .format(joy_probs, fear_probs, anger_probs, disgust_probs, sadness_probs, shame_probs)) prediction = find_max(probs_list) # print(prediction) # print("Real Class: %s " % res[1]) # print("Prediction: %s " % class_list[prediction]) if res[1] == class_list[prediction]: true_amount += 1 # print("\n Real class :: "+str(res[1])) # print("\n Predicted :: "+str(class_list[prediction])) # break accuracy = (float(true_amount) / float(results_amount)) * 100 print("Accuracy : {0:.4f}".format(accuracy)) with open("naive_bayes_result.txt", "a") as out_file: out_file.write("\nTesting start from " + str(start) + " - " + str(end)) out_file.write("\nAccuracy : {0:.4f}".format(accuracy)) out_file.write("\nNumber of predicted :: " + str(true_amount) + "/" + str(results_amount)) pie.finish() except Error as e: print(e) finally: cursor.close() conn.close()