Exemple #1
0
    def train(self, emails, w2v, epochs=10, save_model=True):
        loss_criteria = nn.MSELoss()
        optimizer = optim.RMSprop(self.parameters(),
                                  lr=0.0001,
                                  alpha=0.99,
                                  momentum=0.0)
        # optimizer = optim.Adam(self.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
        email_reps = w2v.get_email_reps(emails, average=True)

        for epoch in range(epochs):
            print 'running epoch ', epoch
            start = time.time()
            epoch_loss = 0.0
            for i in range(len(emails)):
                sender_id = utils.get_userid(emails[i, constants.SENDER_EMAIL])
                # if no word_rep was found for any of the words in the emails, ignore this case
                if type(email_reps[i]) == type(None):
                    continue
                # gets the average email embedding based on word embeddings of all the words in the mail
                email_rep = email_reps[i]
                recv_list = emails[i, constants.RECEIVER_EMAILS].split('|')
                for recv in recv_list:
                    optimizer.zero_grad()
                    recv_id = utils.get_userid(recv)
                    # if sender or receiver is not an enron email id, we ignore this data point
                    if sender_id is None or recv_id is None:
                        continue
                    # if valid sender and receiver pairs have been found update their frequencies
                    self.emailid_train_freq[emails[
                        i,
                        constants.SENDER_EMAIL]] = self.emailid_train_freq.get(
                            emails[i, constants.SENDER_EMAIL], 0) + 1
                    self.emailid_train_freq[
                        recv] = self.emailid_train_freq.get(recv, 0) + 1
                    # do the forward pass
                    pred_email_rep = self.forward(
                        autograd.Variable(torch.LongTensor([sender_id])),
                        autograd.Variable(torch.LongTensor([recv_id])))
                    # compute the loss
                    loss = loss_criteria(
                        pred_email_rep,
                        autograd.Variable(torch.from_numpy(email_rep)))
                    # propagate the loss backward and compute the gradient
                    loss.backward()
                    # change weights based on gradient value
                    optimizer.step()
                    epoch_loss += loss.data.numpy()
            end = time.time()
            print 'time taken ', (end - start)
            print 'loss in epoch ' + str(epoch) + ' = ' + str(epoch_loss)

        if save_model:
            file_name = constants.RUN_ID + '_model.pth'
            self.save(file_name)
        email_ids, embs = self.extract_user_embeddings()
        utils.save_user_embeddings(email_ids, embs)
        # utils.get_similar_users(email_ids, embs)
        plots.plot_with_tsne(email_ids, embs, display_hover=False)
Exemple #2
0
def main(args):
    psr_id = utils.get_pulsarid(args.psrname)
    if args.remove_all:
        clear_curators(psr_id)
    else:
        to_add_ids = [utils.get_userid(username) for username in \
                            args.to_add]
        if args.add_wild:
            to_add_ids.append(None)

        to_rm_ids = [utils.get_userid(username) for username in \
                            args.to_remove]
        if args.remove_wild:
            to_rm_ids.append(None)
        update_curators(psr_id, to_add_ids, to_rm_ids)
Exemple #3
0
def create_post_view():
    user_id = get_userid()
    if not user_id:
        print("USER ID WAS BAD")
        return render_template("login.html")
    # token_conn = TokenTable()
    # user_id = token_conn.get_uuid(cookie)
    acc = Account.init_from_uuid(user_id)

    res = request.args
    print(res)
    print(request.form)
    if not res:
        return render_template("edit_post.html")
    res = dict(res)

    res['is_request'] = True
    res['length'] = res.get('duration')
    res['date'] = res.get('start_date')
    res['skill_set'] = res.get('skillset', [])

    post = acc.create_post(**res)
    # print("POSTS CREATE NEW")
    post = post.to_dict()
    # print(post)

    return redirect("/posts")
Exemple #4
0
def create_new_post():
    user_id = get_userid()
    if not user_id:
        print("USER ID WAS BAD")
        return json.dumps({"status": "failure"})

    # token_conn = TokenTable()
    # user_id = token_conn.get_uuid(cookie)
    acc = Account.init_from_uuid(user_id)

    res = request.json
    res = dict(res)
    print(res)

    res['is_request'] = True
    res['length'] = res.get('duration')
    res['date'] = res.get('start_date')
    res['skill_set'] = res.get('skillset', [])
    res['tags'] = res.get('tags', "").split(',')

    post = acc.create_post(**res)
    print("POSTS CREATE NEW!")
    post = post.to_dict()
    print(post)
    post['status'] = 'success'

    return json.dumps(post)
Exemple #5
0
def add_member():
    form = {k: request.form[k].strip() for k in request.form}
    if 'user_id' not in form:
        assert 'user_username' in form
        if utils.validate_username(form['user_username']):
            return Validity(
                False, 'User ' + form['user_username'] +
                ' does not exist.').get_resp()
        form['user_id'] = utils.get_userid(form['user_username'])
    if not utils.validate_userid(int(form['user_id'])):
        return Validity(False, 'Invalid user.').get_resp()
    if utils.validate_groupid(group_id=int(form['group_id'])):
        if utils.validate_groupreqs(int(form['user_id']),
                                    int(form['group_id'])):
            return Validity(False, 'Invitation already sent.').get_resp()
        if not utils.validate_membership(int(form['user_id']),
                                         int(form['group_id'])):
            group = Group.query.filter_by(id=int(form['group_id'])).first()
            group.add_memberReq(int(form['user_id']))
            db.session.commit()
            return Validity(True).get_resp()
        else:
            return Validity(False, 'Already in the group').get_resp()
    else:
        return Validity(False, 'Invalid group id').get_resp()
Exemple #6
0
def signup():
    """
    email    (string)
    password (string) **not hashed yet!
    """
    # similar to the login route

    cookie = request.cookies.get(TOKEN_NAME)

    # # no cookie
    if not cookie:
        return render_template("signup.html")

    token_conn = TokenTable()
    user_id = token_conn.get_uuid(cookie)
    account = Account.init_from_uuid(user_id)

    # if they are logge din with valid cookie
    if user_id and cookie and token_conn.validate(user_id, cookie):
        return render_template("signup.html",
                               token_uuid=get_userid(),
                               logged_in=True,
                               **account.to_dict())

    return render_template("signup.html")
Exemple #7
0
 def authenticate(self, fb_code):
     try:
         fb_uid = get_userid(fb_code)
         profile = FBProfile.objects.get(fb_userid=fb_uid)
         return profile.user
     except ValueError:
         return None
     except ObjectDoesNotExist:
         return None
    def predict(self, email, w2v, label=None, training_mode=False):
        loss_criteria = nn.CrossEntropyLoss()

        sender_id = utils.get_userid(email[constants.SENDER_EMAIL])
        email_content = email[constants.EMAIL_BODY]
        # skip if the sender does not have an embedding or there are no words in the email
        if sender_id is None or email_content is None:
            return 0, False

        # gets the average email embedding based on word embeddings of all the words in the mail
        email_rep = np.array(w2v.get_sentence(email[2]))
        if email_rep.shape[0]:
            email_rep = np.mean(email_rep, axis=0).reshape(1, -1)
        else:
            return 0, False

        recv_list = email[1].split('|')
        recv_ids = []
        for recv in recv_list:
            recv_id = utils.get_userid(recv)
            if recv_id is not None:
                recv_ids.append(recv_id)
                self.emailid_train_freq[recv] = self.emailid_train_freq.get(
                    recv, 0) + 1
        # if none of the receivers were found, ignore this case
        if len(recv_ids) == 0:
            return 0, False

        # if the sender was found and is being used for training update his freq count
        self.emailid_train_freq[email[
            constants.SENDER_EMAIL]] = self.emailid_train_freq.get(
                email[constants.SENDER_EMAIL], 0) + 1

        # do the forward pass
        pred_out = self.forward(sender_id, recv_ids, email_rep)
        # compute the loss
        if training_mode:
            loss = loss_criteria(pred_out, label)
            return loss, True
        else:
            out_probs = nn.Softmax()(pred_out)
            return out_probs, True
Exemple #9
0
def add_to_posts():
    """
    we are going to have some filtering going on...
    """

    # get post id from request, create post object, add a volunteer to the post object, update
    post_id = request.json.post_id
    post = Post.init_from_uid(post_id)
    uuid = get_userid()
    post.add_volunteer(uuid)
    post.update_in_db()

    return render_template("posts.html")
    def predict(self, email, w2v):
        loss_criteria = nn.MSELoss()
        sender_id = utils.get_userid(email[constants.SENDER_EMAIL])
        email_content = email[constants.EMAIL_BODY]
        # skip if the sender does not have an embedding or there are no words in the email
        if sender_id is None or email_content is None:
            return 0, False

        recv_list = email[1].split('|')
        recv_ids = []
        for recv in recv_list:
            recv_id = utils.get_userid(recv)
            if recv_id is not None:
                recv_ids.append(recv_id)
                self.emailid_train_freq[recv] = self.emailid_train_freq.get(
                    recv, 0) + 1
        # if none of the receivers were found, ignore this case
        if len(recv_ids) == 0:
            return 0, False

        # if the sender was found and is being used for training update his freq count
        self.emailid_train_freq[email[
            constants.SENDER_EMAIL]] = self.emailid_train_freq.get(
                email[constants.SENDER_EMAIL], 0) + 1

        # get word representations from glove word2vec
        email_word_reps = w2v.get_sentence(email_content)
        # generate a matrix that will contain all combinations of w_j-1,w_j+1 - > w_j
        prev_next_embs, curr_embs = self.generate_all_combinations(
            email_word_reps)
        if len(curr_embs) == 0:
            return 0, False
        # do the forward pass
        pred_word_reps = self.forward(sender_id, recv_ids, prev_next_embs)
        # compute the loss
        loss = loss_criteria(pred_word_reps,
                             autograd.Variable(torch.from_numpy(curr_embs)))
        return loss, True
Exemple #11
0
def grab_post():
    user_id = get_userid()
    if not user_id:
        return FAIL_MSG

    user = Account.init_from_uuid(user_id)
    if not user:
        return FAIL_MSG

    post_id = request.json["post_id"]
    post = Post.init_from_uid(post_id)
    if not post.add_volunteer(user_id):
        return FAIL_MSG
    print("!!!!!!!!!!!!!!!!!!!!!!!!!")
    post.update_in_db()
    return json.dumps({"status": "success"})
Exemple #12
0
def deny_friendReqs():
    form = {k: request.form[k].strip() for k in request.form}
    if 'friend_id' not in form:
        assert 'friend_username' in form
        if utils.validate_username(form['friend_username']):
            return Validity(
                False, 'User ' + form['friend_username'] +
                ' does not exist.').get_resp()
        form['friend_id'] = utils.get_userid(form['friend_username'])
    if not utils.validate_friendreqs(int(form['friend_id']),
                                     int(current_user.id)):
        return Validity(False, 'Request does not exist.').get_resp()
    friend = User.query.filter_by(id=int(form['friend_id'])).first()
    friend.deny_friendReq(int(current_user.id))
    db.session.commit()
    return Validity(True).get_resp()
Exemple #13
0
    def extract_user_embeddings(self, threshold=1):
        """
        saves the user embeddings as a dictionary key: emailId, value user embeddings
        :return:
        """
        all_email_ids = utils.get_user_emails()

        email_ids = []
        embeddings = []
        for e_id in all_email_ids:
            if self.emailid_train_freq.get(e_id, 0) < threshold:
                continue
            email_ids.append(e_id)
            uid = utils.get_userid(e_id)
            emb = self.embedding_layer(
                autograd.Variable(torch.LongTensor([uid])))
            emb_np = emb.data.numpy().reshape(-1)
            embeddings.append(emb_np)
        return email_ids, np.array(embeddings)
Exemple #14
0
def touch_timfile(timfile_id, existdb=None):
    """Update the mod_time of the timfile.

        Inputs:
            timfile_id: The ID of the timfile to touch.

        Outputs:
            None
    """
    db = existdb or database.Database()
    db.connect()

    values = {'user_id': utils.get_userid(), \
              'add_time': datetime.datetime.now()}
    update = db.timfiles.update().\
                where(db.timfiles.c.timfile_id==timfile_id)
    results = db.execute(update, values)
    results.close()

    if not existdb:
        db.close()
Exemple #15
0
def delete_member():
    form = {k: request.form[k].strip() for k in request.form}
    if 'user_id' not in form:
        assert 'user_username' in form
        if utils.validate_username(form['user_username']):
            return Validity(
                False, 'User ' + form['user_username'] +
                ' does not exist.').get_resp()
        form['user_id'] = utils.get_userid(form['user_username'])
    if utils.validate_groupid(group_id=int(form['group_id'])):
        if utils.validate_membership(int(form['user_id']), int(
                form['group_id'])) and not utils.validate_ownership(
                    int(form['user_id']), int(form['group_id'])):
            group = Group.query.filter_by(id=int(form['group_id'])).first()
            group.delete_member(int(form['user_id']))
            db.session.commit()
            return Validity(True).get_resp()
        else:
            return Validity(False, 'Can not quit the group').get_resp()
    else:
        return Validity(False, 'Invalid group id').get_resp()
Exemple #16
0
def add_friend():
    form = {k: request.form[k].strip() for k in request.form}
    if 'friend_id' not in form:
        assert 'friend_username' in form
        if utils.validate_username(form['friend_username']):
            return Validity(
                False, 'User ' + form['friend_username'] +
                ' does not exist.').get_resp()
        form['friend_id'] = utils.get_userid(form['friend_username'])
    if utils.validate_friendship(int(current_user.id), int(form['friend_id'])):
        return Validity(
            False, 'User ' + form['friend_username'] +
            ' has already been your friend.').get_resp()
    if utils.validate_friendreqs(int(current_user.id), int(form['friend_id'])):
        return Validity(False, 'Request already sent.').get_resp()


#    friend = User.query.filter_by(id = int(form['friend_id'])).first()
    current_user.add_friendReq(int(form['friend_id']))
    # friend.add_friend(int(current_user.id))
    db.session.commit()
    return Validity(True).get_resp()
Exemple #17
0
def update_group():
    form = {k: request.form[k].strip() for k in request.form}
    if 'owner_id' not in form and 'owner_username' in form:
        #assert 'owner_username' in form
        if utils.validate_username(form['owner_username']):
            return Validity(
                False, 'User ' + form['owner_username'] +
                ' does not exist.').get_resp()
        form['owner_id'] = utils.get_userid(form['owner_username'])
    if utils.validate_groupid(int(form['group_id'])):
        if utils.validate_ownership(int(current_user.id),
                                    int(form['group_id'])):
            group = Group.query.filter_by(id=int(form['group_id'])).first()
            group.update(name=(None if 'name' not in form else form['name']),
                         owner_id=(None if 'owner_id' not in form else int(
                             form['owner_id'])),
                         info=(None if 'info' not in form else form['info']))
            db.session.commit()
            return Validity(True, group.get_info_map()).get_resp()
        else:
            return Validity(False, 'No access').get_resp()
    else:
        return Validity(False, 'Invalid group id').get_resp()
Exemple #18
0
def replace_rawfile(obsolete_id, replace_id, comments, existdb=None):
    """In the database, mark an obsolete data file as being replaced.

        Inputs:
            obsolete_id: The rawfile_id of the data file being replaced.
            replace_id: The rawfile_id of the replacement data file.
            comments: A comment describing the replacement.
            existdb: An (optional) existing database connection object.
                (Default: Establish a db connection)
        Outputs:
            None
    """
    # Connect to the database
    db = existdb or database.Database()
    db.connect()

    # Check if obsolete_id exists in rawfiles. If not, fail.
    select = db.select([db.rawfiles.c.rawfile_id, \
                        db.replacement_rawfiles.c.replacement_rawfile_id.\
                                label("existing_replace_id")], \
                from_obj=[db.rawfiles. \
                    outerjoin(db.replacement_rawfiles, \
                        onclause=db.replacement_rawfiles.c.obsolete_rawfile_id == \
                                db.rawfiles.c.rawfile_id)]).\
                where(db.rawfiles.c.rawfile_id == obsolete_id)
    result = db.execute(select)
    rows = result.fetchall()
    if len(rows) > 1:
        raise errors.InconsistentDatabaseError("There are multiple (%d) " \
                    "rawfiles with ID=%d. Each ID should be unique!" % \
                    (len(rows), obsolete_id))
    elif len(rows) != 1:
        raise errors.BadInputError("The obsolete rawfile being replaced " \
                    "(ID:%d) does not exist!" % obsolete_id)
    row = rows[0] # There is only one row

    # Check if obsolete_id is already replaced. If so, list replacement and fail.
    if row['existing_replace_id'] is not None:
        raise errors.RawfileSuperseded("The rawfile (ID=%d) has already been " \
                                "replaced by ID=%d. Perhaps it is the " \
                                "latter file that should be replaced, or " \
                                "perhaps no additional replacement is " \
                                "required." % \
                                (obsolete_id, row['existing_replace_id']))

    # Log the replacement
    user_id = utils.get_userid()
    ins = db.replacement_rawfiles.insert()
    values = {'obsolete_rawfile_id':obsolete_id, \
              'replacement_rawfile_id':replace_id, \
              'user_id':user_id, \
              'comments':comments}
    result = db.execute(ins, values)
    result.close()
    
    # Check if obsolete_id is itself a replacement for other files
    # If so, mark all with newest replacement and
    # append comment (tag with date/time)?
    select = db.select([db.replacement_rawfiles.c.obsolete_rawfile_id, \
                        db.replacement_rawfiles.c.comments]).\
                where(db.replacement_rawfiles.c.replacement_rawfile_id == \
                            obsolete_id)
    result = db.execute(select)
    rows = result.fetchall()
    result.close()
    user = utils.get_userinfo()
    for row in rows:
        newcomments = row['comments']+"\n%s (%d -> %d at %s): %s" % \
            (user, obsolete_id, replace_id, utils.Give_UTC_now(), comments)
        values = {'replacement_rawfile_id':replace_id, \
                  'comments':newcomments}
        update = db.replacement_rawfiles.c.update().\
                where(db.replacement_rawfiles.c.replacement_rawfile_id == \
                            obsolete_id)
        results = db.execute(update, values)
        results.close()