Exemple #1
0
def log_out():
    '''Endpoint that removes user's data.

        Body:
            cookie: Cookie of current user.

        '''
    cookie = request.form['cookie']
    # Get current user
    res = database.find_one('connections', {'_id': cookie})
    email_name = res['email_name']
    keep = res['keep']
    database.delete_one('connections', {'_id': cookie})

    # If user don't want to keep his data, remove all of them.
    if not keep == "yes":
        database.delete_one('messages', {'_id': email_name})
        database.delete_one('clusters', {'_id': email_name})
        database.delete_one('saveDictations', {'_id': email_name})
        database.delete_one('users', {'_id': email_name})
        out = os.path.join("./data", email_name)
        if os.path.exists(out):
            shutil.rmtree(out)

    return jsonify({'message': 'OK'})
Exemple #2
0
def main():

	print(""" 
		WELCOME TO STUDENTS DATABASE
		############################
		Select Operation :
		(1) for create Table
		(2) for Add Record
		(3) for Show Records
		(4) for Delete Records
		(5) for Records Selection
		""")
	operation = input("Enter your Choice : ")
	if operation == "2":
		database.add_record()
	elif operation == "3":
		database.show_all()
	elif operation == "4":
		database.delete_one(id)
	elif operation == "5":
		database.select()
	elif operation == "1":

		print("Please Contact DataBase Admonistrator for this Operation")
	else:
		print("Try again !!")
Exemple #3
0
def getEmails():
    '''Endpoint that returns the sent emails of a Gmail user.

        Body:
            cookie: Cookie of current user.
            token: Authentication token from Gmail API.
            keep: If true, keep current user;s data after log out.
        '''

    data = request.form
    token = data['token']
    cookie = data['cookie']
    keep = data['keep']
    # Get user's info and save a new connection in the database.
    email_name, name, picture = getInfo(token)
    database.insert_one('connections', {
        '_id': cookie,
        'email_name': email_name,
        'keep': keep
    })

    # If user's data exist, return the emails.
    res = database.find_one('users', {'_id': email_name})
    if res is not None:
        res = database.find_one('messages', {'_id': email_name})
        if res is not None:
            messages = res['messages']
            return jsonify(messages)
        else:
            # In case data are not sychronized correctly.
            database.delete_one('users', {'_id': email_name})

    database.insert_one('users', {
        '_id': email_name,
        'name': name,
        'picture': picture
    })
    # Send get request in gmail api to get the user's emails.
    read_endpoint = "https://www.googleapis.com/gmail/v1/users/userId/messages"
    headers = {
        'Authorization': 'Bearer ' + token,
        'Accept': 'application/json'
    }
    read_response = requests.get(read_endpoint,
                                 headers=headers,
                                 params={
                                     'userId': 'me',
                                     'labelIds': ['SENT']
                                 })
    messages = read_response.json().get('messages')
    clean_messages = []
    for idx, message in enumerate(messages):
        # Get message based in the id.
        get_endpoint = "https://www.googleapis.com/gmail/v1/users/userId/messages/id"
        get_response = requests.get(get_endpoint,
                                    headers=headers,
                                    params={
                                        'userId': 'me',
                                        'id': message['id'],
                                        'format': 'raw'
                                    })
        raw_msg = get_response.json().get("raw")
        string_message = str(base64.urlsafe_b64decode(raw_msg), "ISO-8859-7")
        # Convert current message to mime format.
        mime_msg = email.message_from_string(string_message)
        # Convert current message from mime to string.
        body, msg_headers = mime2str(mime_msg)
        # Clean current message
        proccesed_body = process_text(body)
        size = len(msg_headers)
        clean_messages.append({
            'body': body,
            'processed_body': proccesed_body,
            'sender': (msg_headers[0] if size > 0 else " "),
            'subject': (msg_headers[2] if size > 2 else " ")
        })

    # Save user's emails in database and return them.
    database.insert_one('messages', {
        '_id': email_name,
        'messages': clean_messages
    })
    return jsonify(clean_messages)
Exemple #4
0
def getClusters():
    '''Endpoint that clusters the emails.

        Body:
            cookie: Cookie of current user.
            metric: Metric to be used for closest point calculation.
            n_clusters: Number of clusters.
            method: Method of selecting number of clusters to be used (knee, silhouette).
            min_cl: Min number of clusters.
            max_cl: Max number of clusters.
            level: Level of clustering (per sentence or per email).
        '''
    data = request.form
    cookie = data['cookie']
    metric = data['metric']
    n_clusters = data['n_clusters']
    method = data['method']
    min_cl = int(data['min_cl'])
    max_cl = int(data['max_cl'])
    level = data['level']

    # Get current user.
    res = database.find_one('connections', {'_id': cookie})
    email_name = res['email_name']

    # Get messages of current user.
    res = database.find_one('messages', {'_id': email_name})
    messages_col = res['messages']
    # Keep them as sentences if asked to.
    emails = []
    for msg in messages_col:
        if level == "sentence":
            emails.extend(msg['processed_body'])
        else:
            emails.append(" ".join(msg['processed_body']))

    # Represent them as vectors.
    X = get_spacy(emails, nlp)

    if n_clusters == "":
        # Get metrics in different number of clusters (range [min_cl, max_cl]).
        sse, silhouette = get_metrics(X, min_cl, max_cl)
        if method == 'elbow':
            n_clusters = find_knee(sse, min_cl)
        else:
            n_clusters = silhouette_analysis(silhouette, min_cl)
    # Run k-means with given number of clusters.
    n_clusters = int(n_clusters)
    labels, centers = run_kmeans(X, n_clusters)

    # Save computed clusters in filesystem.
    out = os.path.join('./data', os.path.join(email_name, 'clusters'))
    save_clusters(emails, labels, os.path.join(email_name, 'clusters'))
    cluster2text(out, n_clusters)

    # Get a sample for each cluster.
    samples = []
    for i in range(n_clusters):
        samples.append(emails[closest_point(centers[i], X, metric)])

    # We want to keep some representative words for each cluster
    # in order to identify the topic it represents. So we take
    # the words with the heighest tf-idf metric in each cluster.
    cv = CountVectorizer(stop_words=STOP_WORDS)
    tfidf = TfidfTransformer(smooth_idf=True, use_idf=True)
    keywords_total = []
    for i in range(n_clusters):
        emails_cluster = [
            emails[j] for j in range(len(emails)) if labels[j] == i
        ]
        word_count_vector = cv.fit_transform(emails_cluster)
        tfidf.fit(word_count_vector)
        feature_names = cv.get_feature_names()
        tf_idf_vector = tfidf.transform(cv.transform(emails_cluster))
        sorted_items = sort_coo(tf_idf_vector.tocoo())
        keywords = extract_topn_from_vector(feature_names, sorted_items, 10)
        keywords_total.append(keywords)

    # Delete previous user's clustering.
    database.delete_one('clusters', {'_id': email_name})
    # Insert computed clusters in database.
    database.insert_one(
        'clusters', {
            '_id': email_name,
            'centers': centers.tolist(),
            'labels': labels.tolist(),
            'samples': samples,
            'keywords': keywords_total,
            'metric': metric
        })

    clusters = [[] for i in range(n_clusters)]
    for idx, email in enumerate(emails):
        clusters[labels[idx]].append(email)

    weight = '0.5'
    # Create language models using srilm.
    for cluster in os.listdir(out):
        cluster_path = os.path.join(out, cluster)
        if os.path.isdir(cluster_path):
            if subprocess.call([
                    'ngram-count -kndiscount -interpolate -text ' +
                    os.path.join(cluster_path, 'corpus') +
                    ' -wbdiscount1 -wbdiscount2 -wbdiscount3 -lm ' +
                    os.path.join(cluster_path, 'model.lm')
            ],
                               shell=True):
                print('Error in subprocess')
            if subprocess.call([
                    'ngram -lm ' + lmPath + ' -mix-lm ' +
                    os.path.join(cluster_path, 'model.lm') + ' -lambda ' +
                    weight + ' -write-lm ' +
                    os.path.join(cluster_path, 'merged.lm')
            ],
                               shell=True):
                print('Error in subprocess')

    return jsonify({
        'samples': samples,
        'keywords': keywords_total,
        'clusters': clusters
    })
Exemple #5
0
 def delete_row(self, table, designation):
     database.delete_one(self.conn, table, designation)
Exemple #6
0
import database

# Lookup email record
database.email_lookup("*****@*****.**")

# Delete One record
database.delete_one('4')

# Add record
database.add_one("Rafael", "Magalhaes", "*****@*****.**")

# Adds Many records
lista = [("Remy", "Lacroix", "*****@*****.**"),
         ("Lexi", "Belle", "*****@*****.**")]
database.add_many(lista)

# Show All records
database.show_all()