def log_out(): '''Endpoint that removes user's data. Body: cookie: Cookie of current user. ''' cookie = request.form['cookie'] # Get current user res = database.find_one('connections', {'_id': cookie}) email_name = res['email_name'] keep = res['keep'] database.delete_one('connections', {'_id': cookie}) # If user don't want to keep his data, remove all of them. if not keep == "yes": database.delete_one('messages', {'_id': email_name}) database.delete_one('clusters', {'_id': email_name}) database.delete_one('saveDictations', {'_id': email_name}) database.delete_one('users', {'_id': email_name}) out = os.path.join("./data", email_name) if os.path.exists(out): shutil.rmtree(out) return jsonify({'message': 'OK'})
def main(): print(""" WELCOME TO STUDENTS DATABASE ############################ Select Operation : (1) for create Table (2) for Add Record (3) for Show Records (4) for Delete Records (5) for Records Selection """) operation = input("Enter your Choice : ") if operation == "2": database.add_record() elif operation == "3": database.show_all() elif operation == "4": database.delete_one(id) elif operation == "5": database.select() elif operation == "1": print("Please Contact DataBase Admonistrator for this Operation") else: print("Try again !!")
def getEmails(): '''Endpoint that returns the sent emails of a Gmail user. Body: cookie: Cookie of current user. token: Authentication token from Gmail API. keep: If true, keep current user;s data after log out. ''' data = request.form token = data['token'] cookie = data['cookie'] keep = data['keep'] # Get user's info and save a new connection in the database. email_name, name, picture = getInfo(token) database.insert_one('connections', { '_id': cookie, 'email_name': email_name, 'keep': keep }) # If user's data exist, return the emails. res = database.find_one('users', {'_id': email_name}) if res is not None: res = database.find_one('messages', {'_id': email_name}) if res is not None: messages = res['messages'] return jsonify(messages) else: # In case data are not sychronized correctly. database.delete_one('users', {'_id': email_name}) database.insert_one('users', { '_id': email_name, 'name': name, 'picture': picture }) # Send get request in gmail api to get the user's emails. read_endpoint = "https://www.googleapis.com/gmail/v1/users/userId/messages" headers = { 'Authorization': 'Bearer ' + token, 'Accept': 'application/json' } read_response = requests.get(read_endpoint, headers=headers, params={ 'userId': 'me', 'labelIds': ['SENT'] }) messages = read_response.json().get('messages') clean_messages = [] for idx, message in enumerate(messages): # Get message based in the id. get_endpoint = "https://www.googleapis.com/gmail/v1/users/userId/messages/id" get_response = requests.get(get_endpoint, headers=headers, params={ 'userId': 'me', 'id': message['id'], 'format': 'raw' }) raw_msg = get_response.json().get("raw") string_message = str(base64.urlsafe_b64decode(raw_msg), "ISO-8859-7") # Convert current message to mime format. mime_msg = email.message_from_string(string_message) # Convert current message from mime to string. body, msg_headers = mime2str(mime_msg) # Clean current message proccesed_body = process_text(body) size = len(msg_headers) clean_messages.append({ 'body': body, 'processed_body': proccesed_body, 'sender': (msg_headers[0] if size > 0 else " "), 'subject': (msg_headers[2] if size > 2 else " ") }) # Save user's emails in database and return them. database.insert_one('messages', { '_id': email_name, 'messages': clean_messages }) return jsonify(clean_messages)
def getClusters(): '''Endpoint that clusters the emails. Body: cookie: Cookie of current user. metric: Metric to be used for closest point calculation. n_clusters: Number of clusters. method: Method of selecting number of clusters to be used (knee, silhouette). min_cl: Min number of clusters. max_cl: Max number of clusters. level: Level of clustering (per sentence or per email). ''' data = request.form cookie = data['cookie'] metric = data['metric'] n_clusters = data['n_clusters'] method = data['method'] min_cl = int(data['min_cl']) max_cl = int(data['max_cl']) level = data['level'] # Get current user. res = database.find_one('connections', {'_id': cookie}) email_name = res['email_name'] # Get messages of current user. res = database.find_one('messages', {'_id': email_name}) messages_col = res['messages'] # Keep them as sentences if asked to. emails = [] for msg in messages_col: if level == "sentence": emails.extend(msg['processed_body']) else: emails.append(" ".join(msg['processed_body'])) # Represent them as vectors. X = get_spacy(emails, nlp) if n_clusters == "": # Get metrics in different number of clusters (range [min_cl, max_cl]). sse, silhouette = get_metrics(X, min_cl, max_cl) if method == 'elbow': n_clusters = find_knee(sse, min_cl) else: n_clusters = silhouette_analysis(silhouette, min_cl) # Run k-means with given number of clusters. n_clusters = int(n_clusters) labels, centers = run_kmeans(X, n_clusters) # Save computed clusters in filesystem. out = os.path.join('./data', os.path.join(email_name, 'clusters')) save_clusters(emails, labels, os.path.join(email_name, 'clusters')) cluster2text(out, n_clusters) # Get a sample for each cluster. samples = [] for i in range(n_clusters): samples.append(emails[closest_point(centers[i], X, metric)]) # We want to keep some representative words for each cluster # in order to identify the topic it represents. So we take # the words with the heighest tf-idf metric in each cluster. cv = CountVectorizer(stop_words=STOP_WORDS) tfidf = TfidfTransformer(smooth_idf=True, use_idf=True) keywords_total = [] for i in range(n_clusters): emails_cluster = [ emails[j] for j in range(len(emails)) if labels[j] == i ] word_count_vector = cv.fit_transform(emails_cluster) tfidf.fit(word_count_vector) feature_names = cv.get_feature_names() tf_idf_vector = tfidf.transform(cv.transform(emails_cluster)) sorted_items = sort_coo(tf_idf_vector.tocoo()) keywords = extract_topn_from_vector(feature_names, sorted_items, 10) keywords_total.append(keywords) # Delete previous user's clustering. database.delete_one('clusters', {'_id': email_name}) # Insert computed clusters in database. database.insert_one( 'clusters', { '_id': email_name, 'centers': centers.tolist(), 'labels': labels.tolist(), 'samples': samples, 'keywords': keywords_total, 'metric': metric }) clusters = [[] for i in range(n_clusters)] for idx, email in enumerate(emails): clusters[labels[idx]].append(email) weight = '0.5' # Create language models using srilm. for cluster in os.listdir(out): cluster_path = os.path.join(out, cluster) if os.path.isdir(cluster_path): if subprocess.call([ 'ngram-count -kndiscount -interpolate -text ' + os.path.join(cluster_path, 'corpus') + ' -wbdiscount1 -wbdiscount2 -wbdiscount3 -lm ' + os.path.join(cluster_path, 'model.lm') ], shell=True): print('Error in subprocess') if subprocess.call([ 'ngram -lm ' + lmPath + ' -mix-lm ' + os.path.join(cluster_path, 'model.lm') + ' -lambda ' + weight + ' -write-lm ' + os.path.join(cluster_path, 'merged.lm') ], shell=True): print('Error in subprocess') return jsonify({ 'samples': samples, 'keywords': keywords_total, 'clusters': clusters })
def delete_row(self, table, designation): database.delete_one(self.conn, table, designation)
import database # Lookup email record database.email_lookup("*****@*****.**") # Delete One record database.delete_one('4') # Add record database.add_one("Rafael", "Magalhaes", "*****@*****.**") # Adds Many records lista = [("Remy", "Lacroix", "*****@*****.**"), ("Lexi", "Belle", "*****@*****.**")] database.add_many(lista) # Show All records database.show_all()