Ejemplo n.º 1
0
class PiazzaWrapper:
    def __init__(self, course_id='xxxxxxxxxxx'):
        self.p = Piazza()
        email_id = input("Enter your Piazza email ID : ")
        password = getpass('Piazza Password:'******'feed']:
            if post['type'] == 'question':
                # print(post['nr'])
                # print(post['content_snipet'])
                time = post['log'][0]['t']
                time = datetime.datetime.strptime(time[:-1],
                                                  "%Y-%m-%dT%H:%M:%S")
                if time.date() == today.date():
                    count += 1
                    if 'has_i' in post.keys():
                        count_i += 1
                    elif 'has_s' in post.keys():
                        count_s += 1
                    else:
                        count_unanswered += 1
                        unanswered_posts.append(post['nr'])
                    # print(time)
        return count, count_i, count_s, count_unanswered, unanswered_posts

    def get_unanswered_followup(self):
        posts = self.comp_photo19.iter_all_posts()
        count = 0
        for post in posts:
            cid = post['nr']
            content = self.comp_photo19.get_post(cid)
            count += self.traverse_content_tree(content)
        return count

    def traverse_content_tree(self, content):
        count = 0
        if 'children' in content.keys():
            if len(content['children']) > 0:
                for content_children in content['children']:
                    count += self.traverse_content_tree(content_children)
                    if 'no_answer' in content_children.keys():
                        count += content_children['no_answer']
        return count

    def get_count_today(self):
        posts = self.comp_photo19.get_feed(100, 0)
        count, count_i, count_s, count_unanswered, unanswered_posts = self.count_posts(
            posts, datetime.datetime.today())
        return count, count_i, count_s, count_unanswered, unanswered_posts
Ejemplo n.º 2
0
    def create_piazza_bot(user_email, user_password, course_code):
        ''' Method to instantiate a Piazza Bot
    Parameters:
    user_email: Piazza user email to authenticate with
    user_password: Piazza password to authenticate with
    course_code: Class/Course code on Piazza '''

        piazza = Piazza()
        piazza.user_login(email=user_email, password=user_password)
        user_profile = piazza.get_user_profile()
        course_site = piazza.network(course_code)

        return PiazzaBot(piazza, user_profile, course_site)
Ejemplo n.º 3
0
class PiazzaBot():
	def __init__(self):
		self.p = Piazza()
		self.p.user_login(USER_NAME, USER_PASS)
		self.uid = self.p.get_user_profile()['user_id']

		# classes = self.p.get_user_classes()

		self.si206 = self.p.network("jlp6m1ynp9713y")

	def do_bot(self):
		posts = self.si206.search_feed("lecture")

		for post in posts:
			id = post[u'id']
			print(self.si206.get_post(id)[u'history'][0][u'content'])
			print("\n\n")
active = args.status
groupname = args.groupname

content = "What chemistry topics were new to you this past week? What was "\
        "confusing?\n\n Many students have the same confusions, so take a "\
        "look at what your peers have answered. See something you understand "\
        "that they don't? Start a discussion! See something you are also "\
        "confused about? Tell them why!\n\n Post your response in this thread"\
        " before Tuesday's lecture.\n\n #pin"

print('Connecting to Piazza via piazza-api...')

# Piazza setup
p = Piazza()
p.user_login(email=email, password=None)
me = p.get_user_profile()
pclass = p.network(classid)
print('  Logged in as: %s' % me.get('name'))
print('')

for i in range(nsects):
    if (groupname != ''):
        thistitle = '{} {}{}'.format(title, groupname, i + 1)
        disc = groupname + ' ' + str(i + 1)
        params = {
            'status': active,
            'type': 'note',
            'folders': [disc],
            'subject': thistitle,
            'content': content,
            'config': {
Ejemplo n.º 5
0
class CaveBot():
    def __init__(self, username, password, network_id, network_name):
        self.p = Piazza()
        self.p.user_login(username, password)

        self.user_profile = self.p.get_user_profile()
        self.network = self.p.network(network_id)
        self.network_name = network_name

        self.es = Elasticsearch()
        self.s = Search(using=self.es, index=self.network_name)
        self.jobs = []
        self.es_index_name = network_name

        self.index2nr = None
        self.nr2index = None
        self.corpus, self.lda_model, self.id2word, self.cleaned_documents = None, None, None, None

        self.topic_vectors = None

        bulk(self.es, self.index_all_with_recommendations())

        #  For debugging purposes and room for future play, reread every post, put into a .txt file and let lda use that.
        piazza_class2txt.main(self.network, "cs3169.txt")

        with open("index2nr.pkl", "rb") as file:
            self.index2nr = pickle.load(file)

        with open("nr2index.pkl", "rb") as file:
            self.nr2index = pickle.load(file)

        self.num_topics = 3
        self.train_iter = 50
        self.corpus, self.lda_model, self.id2word, self.cleaned_documents = refined_lda.main(
            "cs3169.txt",
            num_topics=self.num_topics,
            iterations=self.train_iter)

        self.recommend()

    def main():
        if len(sys.argv) != 5:
            print(
                "Usage: python3 cave_bot.py username password network_id network_name"
            )

        bot = CaveBot(*sys.argv[1:])

    # Recommends once per post, responds to parent answers only
    def index_all_with_recommendations(self):
        for post in self.network.iter_all_posts(limit=None):
            trigger = False
            latest_state = post["history"][0]
            if "!RecommendMe" in latest_state[
                    "subject"] or "!RecommendMe" in latest_state["content"]:
                trigger = True

            # Doesn't look for its children
            for child in post["children"]:
                # felt cleaner than child.get
                if "history" in child:
                    if "!RecommendMe" in child["history"][0]["content"]:
                        trigger = True
                    if "!@#$" in child["history"][0]["content"][:7]:
                        trigger = False
                        break
                else:
                    if "!RecommendMe" in child["subject"]:
                        trigger = True
                    if "!@#$" in child["subject"][:7]:
                        trigger = False
                        break

            if trigger:
                self.jobs.append(post)

            i_answer = ""
            s_answer = ""
            for child in post["children"]:
                if child["type"] == "i_answer":
                    i_answer = child["history"][0]["content"]
                if child["type"] == "s_answer":
                    s_answer = child["history"][0]["content"]

            yield {
                "_op_type": "index",
                "_index": self.es_index_name,
                "_id": post["nr"],
                "_source": {
                    "subject": latest_state["subject"],
                    "content": latest_state["content"],
                    "i_answer": i_answer,
                    "s_answer": s_answer,
                    "responded": trigger
                }
            }

    def recommend(self,
                  debug=True):  # default to true to not accidentally post
        for post in self.jobs:
            response = "!@#$\n"
            # response = str(recommend_with_mlt(search, post)) + "\n"
            # print(int(post["nr"])-1, len(corpus))
            # # response += "Topic of this post: " + str(lda_model[corpus[int(post["nr"])-2]])
            topic = self.topic_of(self.nr2index[post["nr"]])
            response += "Topic of this post: " + str(topic) + "\n"
            # response += "Topic contents " + str(self.lda_model.show_topic(topic)) + "\n"

            response += "Contributive contents: " + str([
                "(" + pair[0] + ", " + str(pair[1])[:4] + ")"
                for pair in self.lda_model.show_topic(topic)
            ]) + "\n"

            # # #may be useful for debugging
            # # response += str([id2word[id_[0]] for id_ in corpus[int(post["nr"])-2]])
            # response += "Post number: " + str(post["nr"]) + "\n"
            # response += "Post content: " + post["history"][0]["content"] + "\n"
            # response += str([self.id2word[id_[0]] for id_ in self.corpus[self.nr2index[post["nr"]]]])
            # response += "\n\n"
            # # response += str(self.get_posts_with_same_best_topic(post["nr"], topic)) + "\n"

            response += "Posts with same topic: " + str(
                self.get_posts_with_same_topic(post["nr"], topic))

            if not debug:
                result = network.create_followup(
                    post["children"][1],
                    response)  # both nr and entire post works
            else:
                print("#### Start Post ####")
                print(response)
                print("#### End Post ####")
                print("train_iter, num_topics: ", self.train_iter,
                      self.num_topics)

    # deprecated and bad
    def get_posts_with_same_best_topic(self,
                                       post_number,
                                       target_topic,
                                       num_docs=3):
        # do one up one down
        docs = []
        looked = 0
        while len(docs) < 3 or looked > 100:
            looked += 1
            nrs = [post_number + looked, post_number - looked]
            for nr in nrs:
                if nr in self.nr2index and nr != -1 and self.topic_of(
                        self.nr2index[nr]) == target_topic:
                    docs.append("@" + str(nr))

        return docs[:num_docs]

    # extra atrocious
    def get_posts_with_same_topic(self, number, target_topic, num_docs=3):
        self.get_topic_vectors()
        vector = self.topic_vectors[self.nr2index[number]]

        min_dists = [float("inf") for i in range(num_docs + 1)]
        min_indices = [0 for i in range(num_docs + 1)]
        for i in range(len(self.topic_vectors)):
            distance = np.sqrt(np.sum((vector - self.topic_vectors[i])**2, 0))
            if distance < max(min_dists):
                i_md = min_dists.index(max(min_dists))
                print(i, distance)
                min_dists[i_md] = distance
                min_indices[i_md] = i

        posts = ["@" + str(self.index2nr[i]) for i in min_indices]
        posts.remove("@" + str(number))  # no need to recommend the same post
        return posts

    def topic_of(self, document):
        topics = [
            topic for index, topic in self.lda_model[self.corpus[document]]
        ]
        return topics.index(max(topics))

    def get_topic_vectors(self):
        docs_topics = []

        for i in range(len(self.corpus)):
            doc_topics = self.lda_model.get_document_topics(self.corpus[i])
            if len(doc_topics) == self.num_topics:
                docs_topics.append(
                    np.array([component[1] for component in doc_topics]))
            else:
                topics = []
                d_pop = doc_topics.pop(-1)
                for i in range(self.num_topics - 1, -1, -1):
                    if i != d_pop[0] or len(doc_topics) == 0:
                        topics.append(0)
                    else:
                        topics.append(d_pop[1])
                        d_pop = doc_topics.pop(-1)
                topics.reverse()
                docs_topics.append(np.array(topics))

        self.topic_vectors = docs_topics

    def recommend_with_mlt(self, post, score_limit=0):
        latest_state = post["history"][0]
        # Can do post likelyhood later
        search_text = latest_state["subject"] + latest_state["content"]

        # maybe static this later
        mlt_query = {
            "more_like_this": {
                "fields": ["subject", "content", "i_answer", "s_answer"],
                "like": search_text,
                "min_term_freq": 1,
                "min_doc_freq": 1,
                "max_query_terms": 50,
                # "term_vector": "with_positions_offsets"
            }
        }

        recommendation = []
        docs = s.query(mlt_query).execute()

        for doc in docs:
            if int(doc.meta["id"]) != int(
                    post["nr"]) and doc.meta["score"] > score_limit:
                recommendation.append((doc.meta["id"], doc.meta["score"]))

        return recommendation

    def change_network(self, network_id):
        self.network = self.p.network(network_id)
        self.s = Search(using=self.es, index=self.network_name)
        self.es_index_name = network_name
Ejemplo n.º 6
0
    def get_data(self):
        return ''.join(self.fed)


def strip_tags(html):
    s = MLStripper()
    s.feed(html)
    return s.get_data()


p = Piazza()
p.user_login('*****@*****.**', 'thomaslau')

f = open('userProfile.txt', 'w')
json.dump(p.get_user_profile(), f)
f.close()

rawUserData = open('userProfile.txt')
jsonUserData = json.load(rawUserData)
rawUserData.close()

masterPath = os.getcwd()

for i in jsonUserData["all_classes"]:
    classConnection = p.network(i)
    posts = classConnection.iter_all_posts(limit=100000)

    className = jsonUserData["all_classes"][i]["num"]
    if className == "CS 103":
        x = 0
Ejemplo n.º 7
0
class PiazzaCompileBot(object):
    
    def __init__(self):
        self.p = Piazza()
        self.p.user_login(PIAZZA_USER, PIAZZA_PASS)
        self.uid = self.p.get_user_profile()['user_id']

        classes = self.p.get_user_classes()
        self.classes = []

        print 'Now watching posts for the following {0} classes:'.format(len(classes))
        for c in classes:
            print '{0} ({1}, {2})'.format(c['num'], c['name'], c['term'])
            self.classes.append(self.p.network(c['nid']))

            self.filter = UnreadFilter()

    def check(self):
        for c in self.classes:

            # ensure we go through the entire feed if there are more posts to read
            feed = {'more': True}
            while feed['more']:
                # filter for only updated posts
                feed = c.get_filtered_feed(self.filter)
                for feed_post in feed['feed']:
                    # get the post number and retrieve the post
                    post = c.get_post(feed_post['nr'])
                    if self.already_compiled(post):
                        print 'Post %s already compiled' % post['id']
                        break
                    post_text = post['history'][0]['content']

                    print 'Checking post %s for code' % post['id']

                    # parse the text in the post
                    # example text:
                    """
                    <p></p><p>I&#39;m having an issue, please help!</p>
                    <p>CompileBot! python</p>
                    <pre>def __init__(self):
                        print &#39;blah&#39;</pre>
                    <p>Input:</p>
                    <pre>blah</pre>
                    """
                    soup = BeautifulSoup(post_text.replace("<br />", "\n"))
                    # Look for p tags
                    tags = soup.find_all('p')
                    for tag in tags:
                        try:
                            m = None if not tag.contents else re.search(r'(?i)CompileBot[.?;:!]*\s*(?P<args>.*)\s*', tag.contents[0])
                            if m is not None and tag.next_sibling and tag.next_sibling.next_sibling:
                                # look for code
                                code = None
                                cur_tag = tag.next_sibling.next_sibling
                                if cur_tag and cur_tag.name == 'pre':
                                    code = cur_tag.contents[0]

                                # look for optional stdin
                                if code is not None:
                                    stdin = ''
                                    if cur_tag.next_sibling and cur_tag.next_sibling.next_sibling:
                                        cur_tag = cur_tag.next_sibling.next_sibling
                                        try:
                                            if cur_tag.name == 'p' and bool(re.match('input', cur_tag.contents[0], re.I)) and cur_tag.next_sibling:
                                                cur_tag = cur_tag.next_sibling.next_sibling
                                                if cur_tag and cur_tag.name == 'pre':
                                                    stdin = cur_tag.contents[0]
                                                    cur_tag = cur_tag.next_sibling
                                        except Exception as e:
                                            pass
                                    else:
                                        pass
                                    code = urllib.unquote(code)
                                    stdin = urllib.unquote(stdin)
                                    try:
                                        lang, opts = m.group('args').split(' -', 1)
                                        opts = ('-' + opts).split()
                                    except ValueError:
                                        # No additional opts found
                                        lang, opts = m.group('args'), []
                                    lang = lang.strip()
                                    print 'Attempting compile for post {0}: language={1}, args={2}'.format(post['id'], lang, opts)
                                    try:
                                        details = self.compile(code, lang, stdin=stdin)
                                        print "Compiled ideone submission {link} for comment {id}".format(link=details['link'], id=post['id'])
                                        # The ideone submission result value indicaties the final state of
                                        # the program. If the program compiled and ran successfully the
                                        # result is 15. Other codes indicate various errors.
                                        result_code = details['result']
                                        # The user is alerted of any errors via message reply unless they
                                        # include an option to include errors in the reply.
                                        if result_code in [11, 12, 15]:
                                            text = self.format_reply(details, opts)
                                            ideone_link = "http://ideone.com/{}".format(details['link'])
                                            text += "Ideone link: %s" % ideone_link
                                            print 'Compilation success!\n%s' % text
                                            c.add_followup(post['id'], text)
                                            print 'Posted results to Piazza'
                                        else:
                                            error_text = {
                                                11: COMPILE_ERROR_TEXT,
                                                12: RUNTIME_ERROR_TEXT,
                                                13: TIMEOUT_ERROR_TEXT,
                                                17: MEMORY_ERROR_TEXT,
                                                19: ILLEGAL_ERROR_TEXT,
                                                20: INTERNAL_ERROR_TEXT
                                            }.get(result_code, '')
                                            # Include any output from the submission in the reply.
                                            if details['cmpinfo']:
                                                error_text += "Compiler Output:\n{}\n".format(
                                                                    self.code_block(details['cmpinfo']))
                                            if details['output']:
                                                error_text += "Output:\n{}\n".format(
                                                        self.code_block(details['output']))
                                            if details['stderr']:
                                                error_text += "Error Output:\n{}\n".format(
                                                                    self.code_block(details['stderr']))
                                            print 'Error: %s' % error_text
                                    except ideone.IdeoneError as e:
                                        c.add_followup(post['id'], 'An Ideone error occurred.\n%s' % self.code_block(e))
                                        print e
                        except ValueError as e:
                            import traceback, os.path, sys
                            top = traceback.extract_tb(sys.exc_info()[2])[-1]
                            print 'Parse failed: {0}'.format(', '.join([type(e).__name__, os.path.basename(top[0]), str(top[1])]))

    def already_compiled(self, post):
        children = post['children']
        for child in children:
            if child['uid'] == self.uid:
                return True
        return False

    def compile(self, source, lang, stdin=''):
        """Compile and evaluate source sode using the ideone API and return
        a dict containing the output details.
        Keyword arguments:
        source -- a string containing source code to be compiled and evaluated
        lang -- the programming language pertaining to the source code
        stdin -- optional "standard input" for the program
        >>> d = compile('print("Hello World")', 'python')
        >>> d['output']
        Hello World
        """
        lang = LANG_ALIASES.get(lang.lower(), lang)
        # Login to ideone and create a submission
        i = ideone.Ideone(IDEONE_USER, IDEONE_PASS)
        sub = i.create_submission(source, language_name=lang, std_input=stdin)
        sub_link = sub['link']
        details = i.submission_details(sub_link)
        # The status of the submission indicates whether or not the source has
        # finished executing. A status of 0 indicates the submission is finished.
        while details['status'] != 0:
            details = i.submission_details(sub_link)
            time.sleep(3)
        details['link'] = sub_link
        return details

    def format_reply(self, details, opts):
        """Returns a reply that contains the output from a ideone submission's
        details along with optional additional information.
        """
        head, body, extra, = '', '', ''
        # Combine information that will go before the output.
        if '--source' in opts:
            head += 'Source:\n{}\n\n'.format(self.code_block(details['source']))
        if '--input' in opts:
        # Combine program output and runtime error output.
            head += 'Input:\n{}\n\n'.format(self.code_block(details['input']))
        output = details['output'] + details['stderr']
        # Truncate the output if it contains an excessive
        # amount of line breaks or if it is too long.
        if output.count('\n') > LINE_LIMIT:
            lines = output.split('\n')
            # If message contains an excessive amount of duplicate lines,
            # truncate to a small amount of lines to discourage spamming
            if len(set(lines)) < 5:
                lines_allowed = 2
            else:
                lines_allowed = 51
            output = '\n'.join(lines[:lines_allowed])
            output += "\n..."
        # Truncate the output if it is too long.
        if len(output) > 8000:
            output = output[:8000] + '\n    ...\n'
        body += 'Output:\n{}\n\n'.format(self.code_block(output))
        if details['cmpinfo']:
            body += 'Compiler Info:\n{}\n\n'.format(details['cmpinfo'])
        # Combine extra runtime information.
        if '--date' in opts:
            extra += "Date: {}\n\n".format(details['date'])
        if '--memory' in opts:
            extra += "Memory Usage: {} bytes\n\n".format(details['memory'])
        if '--time' in opts:
            extra += "Execution Time: {} seconds\n\n".format(details['time'])
        if '--version' in opts:
            extra += "Version: {}\n\n".format(details['langVersion'])
        # To ensure the reply is less than 10000 characters long, shorten
        # sections of the reply until they are of adequate length. Certain
        # sections with less priority will be shortened before others.
        total_len = 0
        for section in (FOOTER, body, head, extra):
            if len(section) + total_len > 9800:
                section = section[:9800 - total_len] + '\n...\n'
                total_len += len(section)
        reply_text = head + body + extra
        return reply_text

    def code_block(self, output):
        return "<pre>{0}</pre>".format(output)
Ejemplo n.º 8
0
        self.fed = []
    def handle_data(self, d):
        self.fed.append(d)
    def get_data(self):
        return ''.join(self.fed)

def strip_tags(html):
    s = MLStripper()
    s.feed(html)
    return s.get_data()

p = Piazza()
p.user_login('*****@*****.**', 'thomaslau')

f = open('userProfile.txt','w')
json.dump(p.get_user_profile(), f)
f.close()

rawUserData = open('userProfile.txt')
jsonUserData = json.load(rawUserData)
rawUserData.close()


masterPath = os.getcwd()

for i in jsonUserData["all_classes"]:
    classConnection = p.network(i)
    posts = classConnection.iter_all_posts(limit=100000)

    className = jsonUserData["all_classes"][i]["num"]
    if className == "CS 103":
Ejemplo n.º 9
0
class PiazzaBot(object):
    def __init__(self,
                 user,
                 password,
                 class_id,
                 corpus=None,
                 corpus_embeddings=None,
                 default_bert=True):
        self.p = Piazza()
        self.p.user_login(user, password)
        self.class_id = class_id
        self.user_profile = self.p.get_user_profile()
        self.network = self.p.network(class_id)
        self.DB_manger = MongoDBManger()
        self.bert = BertSemanticSearch(corpus, corpus_embeddings, default_bert)
        self.parallel_cid_list = []

    def heart_beat(self):
        """
        triggers the heart beat code which process all new posts and puts the data for them into the db and also
        make new postings and suggestions for posts in our

        :return: NA
        """
        posts = self.network.iter_all_posts()
        for post in posts:
            try:
                cid = post["id"]
                query = {"cid": cid}
                result = self.DB_manger.find(query)
                db_dict = self.create_db_dict(post, result)

                # TODO: remove HTML tags

                if result is None and db_dict is not None:
                    self.DB_manger.insert(db_dict)
                    if not db_dict["is_marked"]:
                        self.create_piazza_bot_follow_up(
                            cid, "Piazza Bot is trying to process this post")
                        self.make_private(db_dict)

                elif db_dict is not None:
                    if not db_dict["is_processed"] and db_dict[
                            "is_marked"] and len(self.parallel_cid_list) != 0:
                        self.make_piazza_suggestions(db_dict, cid)
                    elif not db_dict["is_marked"]:
                        print("here")
                        self.create_piazza_bot_follow_up(
                            cid, "Piazza Bot is trying to process this post")
                        self.make_private(db_dict)

                    self.DB_manger.insert_update(query, db_dict)
                    # update the value in the db if the change_log or history has changed
                    if db_dict["change_log_len"] > result[
                            "change_log_len"] or db_dict["revision"] > result[
                                "revision"]:
                        self.DB_manger.insert_update(query, db_dict)

            except KeyError:
                print("no cid")

    def generate_embeddings(self):
        """
        generate the embeddings for all the current posts in the data base

        :return: NA
        """
        docs = self.DB_manger.get_all()
        if docs is None:
            return 1

        corpus = []
        corpus_embeddings = []
        parallel_cid_list_local = []
        for doc in docs:
            corpus.append(doc["content"])
            corpus_embeddings.append(pickle.loads(doc["encoding"]))
            parallel_cid_list_local.append(doc["cid"])

        # turn list of loaded tensors to a single tensor
        corpus_embeddings = [
            torch.unsqueeze(t, dim=0) for t in corpus_embeddings
        ]
        corpus_embeddings = torch.cat(corpus_embeddings, dim=0)

        self.bert.set_corpus(corpus)
        self.bert.set_corpus_embeddings(corpus_embeddings)
        self.parallel_cid_list = parallel_cid_list_local

    def create_db_dict(self, post, old_post, tensor=True):
        """
        generate the embeddings for all the current posts in the data base

        :param post: the new post json data we want to process into a dict we can put into the db
        :param old_post: old db value for the current post
        :return: post dict formatted for the DB
        """
        try:
            cid = post["id"]
            history = post["history"]
            change_log_len = len(post["change_log"])
            revision = len(history)
            cur_post_content = history[-1]
            uid = self.find_uid(cur_post_content)
            if "gd6v7134AUa" == uid:
                return None

            post_type = post["type"]
            post_folders = post['folders']
            post_subject = cur_post_content['subject']
            post_content = cur_post_content['content']
            is_marked_by_pb, is_processed, mark_id = self.is_marked_by_piazza_bot(
                post["children"], old_post)

            new_value = {
                "cid": cid,
                "revision": revision,
                "change_log_len": change_log_len,
                "uid": uid,
                "type": post_type,
                "folders": post_folders,
                "subject": post_subject,
                "content": post_content,
                "is_marked": is_marked_by_pb,
                "mark_id": mark_id,
                "is_processed": is_processed
            }
            # generate a new embedding if this is first time this post is being added to the db or if there was a content update
            if tensor and (old_post is None
                           or revision > old_post["revision"]):
                encoding = pickle.dumps(self.bert.encode_content(post_content))
                print(encoding)
                new_value["encoding"] = encoding
            return new_value

        except KeyError as e:
            print(e)
            return None

    def is_marked_by_piazza_bot(self, children, old_post):
        """
        figure out of the current post has been marked by the bot and processed. if the current post has been marked
        then get the cid for the marking follow up
        :param children: current children posts(follow ups) for the current post
        :param old_post: old db value for the current post
        :return: boolean, boolean, cid
        """
        len_children = len(children)
        if len_children == 0:
            print("getting childern len 0")
            return False, False, "None"

        for follow_up in children:
            if follow_up['type'] == "i_answer":
                return True, True, "None"

            subject = follow_up['subject']
            if subject == "Piazza Bot is trying to process this post":
                return True, False, follow_up['id']
            elif subject == "Piazza Bot Has Processed this post":
                return True, True, follow_up['id']
            elif len(subject
                     ) > 24 and subject[:24] == '<p><b>Piazza Bot</b></p>':
                return True, False, follow_up['id']

        if old_post is not None and old_post["is_marked"]:
            return True, True, old_post["mark_id"]

        return False, False, "None"

    def make_private(self, db_dict):
        """
        make the post associate with the current db dict object private
        :param db_dict: db dict object of the post we want to make private
        :return: 1 if successful else 0
        """
        try:
            if "gd6v7134AUa" != db_dict["uid"]:
                self.update_post(db_dict["cid"], db_dict["type"],
                                 db_dict["revision"], db_dict["folders"],
                                 db_dict["subject"], db_dict["content"], False)

            return 1
        except KeyError:
            return 0

    def make_suggestion_string(self, cur_cid, post_cid):
        link = '<p><a href="https://piazza.com/class/kg9odngyfny6s9?cid={}" target="_blank" rel="noopener">Potential Duplicate of @{}</a></p>'.format(
            cur_cid, cur_cid)
        mark_dup = '<p><a href="http://127.0.0.1:5000/api/dup/{}/{}" target="_blank" rel="noopener">Mark Current Post as Duplicate of @{}</a>'.format(
            post_cid, cur_cid, cur_cid)
        mark_followup = 'or <a href="http://127.0.0.1:5000/api/followup/{}/{}" target="_blank" rel="noopener">Mark Current Post as Follow up of @{}</a></p>'.format(
            post_cid, cur_cid, cur_cid)
        return link + mark_dup + mark_followup

    def make_piazza_suggestions(self, db_dict, cid):
        #TODO add getting suggestions code
        msg = '<p><b>Piazza Bot</b></p><p><a href="http://127.0.0.1:5000/api/post/{}" target="_blank" rel="noopener">Make Post Public</a></p>'.format(
            cid)

        try:
            if "gd6v7134AUa" != db_dict["uid"]:
                topk_idxs = self.bert.single_semantic_search(
                    db_dict["content"], top_k=3)
                topk_cids = [self.parallel_cid_list[idx] for idx in topk_idxs]

                for dup_cid in topk_cids:
                    if dup_cid != cid:
                        msg += self.make_suggestion_string(dup_cid, cid)

                self.update_follow_up(db_dict["mark_id"], msg)

            return 1
        except KeyError:
            return 0

    def find_uid(self, cur_post_content):
        """
        find the uid from the most latest post history(content)

        :param cur_post_content: the content params fot he post we are working on
        :return: the uid for the user who made the last edit on this post
        """
        try:
            uid = cur_post_content["uid"]
        except KeyError:
            uid = ""
        return uid

    def create_post(self,
                    post_folders,
                    post_subject,
                    post_content,
                    post_type="question",
                    is_announcement=0,
                    bypass_email=0,
                    anonymous=False):
        """
        For simulating asking a question on Piazza. See the Piazza package for full documentation.

        NOTE: post_folders is actually a list of string, not a single string
        """
        info = self.network.create_post(post_type=post_type,
                                        post_folders=post_folders,
                                        post_subject=post_subject,
                                        post_content=post_content,
                                        is_announcement=is_announcement,
                                        bypass_email=bypass_email,
                                        anonymous=anonymous)
        return info

    def update_post(self,
                    cid,
                    post_type,
                    revision,
                    post_folders,
                    post_subject,
                    post_content,
                    visibility_all=True):
        """Update a post

        :param cid: cid of the post we want to update
        :param post_type: the type we want to change the post to "note", "question" or "poll"
        :param revision:
        :param post_folders:
        :param post_subject:
        :param post_content:
        :param visibility_all: change post visibility from all to just the instructors and original poster
        :return: if the post update was successful
        """

        params = {
            "cid": cid,
            "subject": post_subject,
            "content": post_content,
            "folders": post_folders,
            "type": post_type,
            "revision": revision,
            "visibility": "all" if visibility_all else "private"
        }
        print(params)
        return self.network._rpc.content_update(params)

    def create_piazza_bot_follow_up(self, cid, content, ionly=False):
        """Create a follow-up on a post.

        :param cid: cid of the post we want to add this follow up too
        :param content: content of the follow up post
        :param ionly: make the visibility of the follow only instructors
        :return: follow up was created
        """

        params = {
            "cid": cid,
            "type": "followup",
            "subject": content,
            "content": "",
        }
        if ionly:
            params["config"] = {"ionly": True},
        return self.network._rpc.content_create(params)

    def update_follow_up(self, followup_post, content):
        """update a follow-up on a post

        :param followup_post: json of the follow up post
        :param content: content of the follow up post
        :return: if the follow up post was successful updated
        """
        self.network.update_post(followup_post, content)

    def get_post(self, cid):
        """ retrieve data for a certain post

        :param cid: cid of the post of you want to retrieve data for
        :return: if the post update was successful
        """
        return self.network.get_post(cid)

    def get_post_from_db(self, cid):
        """ retrieve data from the db for a certain post

        :param cid: cid of the post of you want to retrieve data for
        :return: Mongo result object
        """
        query = {"cid": cid}
        return self.DB_manger.find(query)

    def mark_as_duplicate(self,
                          duplicated_cid,
                          master_cid,
                          msg='Piazza bot found this Duplicate'):
        """ make the given post as duplicate of another

        :param duplicated_cid: cid of the post of you want to make as duplicate
        :param master_cid: cid of the post of you want to put the duplicate under
        :param msg: msg for why the post is marked as a duplicate
        :return: if the duplicate mark request was successful
        """
        self.network.mark_as_duplicate(duplicated_cid, master_cid, msg)

    def delete_post(self, cid):
        """ delete a post from piazza

        :param cid: cid of the post of you want to delete
        :return: if the delete request was successful
        """
        self.network.delete_post(cid)

    def delete_post_db(self, cid):
        """ delete a post from the db

        :param cid: cid of the post of you want to delete
        :return: Mongo result object
        """
        return self.DB_manger.del_by_cid(cid)

    def get_piazza_suggestions(self, query):
        params = {"nid": self.class_id, "query": query}
        r = self.network._rpc.request(method="network.find_similar",
                                      data=params)
        return self.network._rpc._handle_error(
            r, "Could not get suggestions {}.".format(repr(params)))

    def get_full_piazza(self):
        posts = self.network.iter_all_posts()
        dataframe_cols = ["cid", "content", "match 1", "match 2", "match 3"]
        dataframe = pd.DataFrame(columns=dataframe_cols)
        for post in posts:
            db_dict = self.create_db_dict(post, None, False)
            if db_dict is None:
                continue
            content = db_dict["content"]
            result = self.get_piazza_suggestions(content)
            cid = db_dict["cid"]
            try:
                suggestions = result["list"]
            except KeyError:
                continue

            counter = 0
            new_row = {
                "cid": cid,
                "content": content,
                "match 1 cid": "None",
                "match 2 cid": "None",
                "match 3 cid": "None"
            }
            for suggestion in suggestions:
                if suggestion['id'] != cid:
                    if counter == 0:
                        new_row["match 1 cid"] = suggestion['id']
                    elif counter == 1:
                        new_row["match 2 cid"] = suggestion['id']
                    elif counter == 2:
                        new_row["match 3 cid"] = suggestion['id']
                        break
                    counter += 1

            dataframe = dataframe.append(new_row, ignore_index=True)

        dataframe.to_csv(
            r"C:\Users\sohai\Documents\Uni 2020\csc392\piazzabot\data\paizza_api_matchs.csv"
        )
Ejemplo n.º 10
0
def results(request, information_id):
    information = get_object_or_404(Information, pk=information_id)
    #THE "BACKEND" WORD HAPPENS HERE

    p = Piazza()
    email = information.email
    password = information.password
    print(email)
    print(password)

    p.user_login(email=email, password=password)
    user_profile = p.get_user_profile()
    # course = p.network("jcfrsqcwoyyi5") # CS186
    # course = p.network("j5vqc3j229b6u7") # CS170
    course = p.network(information.course_ID)  # CS61b

    # search_words = ["System R", "Grace Hash Join", "Query Optimization", "IO", "hash join"]
    # search_words = ["hash join", "table scan", "sort merge join", "system r", "dependency graph", "hw 4", "query optimization"]
    # search_words = ["max flow", "dynamic programming", "dp", "linear programming", "np", "reduction", "greedy", "recurrence", "bipartite"]
    # search_words = ["asymptotic", "disjoint set", "tree", "hash", "heap", "graph", "traversal", "dynamic programming", "sort", "reduction"]
    search_words = [x.strip() for x in information.keywords.split(',')]

    # start_post = 5668
    start_post = information.last_CID
    end_post = max(0, start_post - 300)

    n = 50

    # Earliest post date
    # year = 2017
    # month = 12
    # day = 01

    cids_to_content = {
    }  #dictionary in the form {cid: message content + ' ' + subject content})

    current_post = start_post

    # Return object
    result = {}
    '''
    Format for result
    {
        word: {
            avg_sentiment: float
            porportion_posts: float
        }
    }
    '''
    '''
        Helper Functions
    '''
    for word in search_words:
        result[word] = {
            'total_sentiment': 0.0,
            'average_sentiment': 0.0,
            'number_posts': 0,
            'proportion_posts': 0.0
        }

    def contains(s, word_list):
        rtn = {}
        for keyword in word_list:
            rtn[keyword] = 0
        for w in s.split():
            if w in word_list:
                rtn[w] += 1
        for key, value in sorted(rtn.items(), key=lambda x: -x[1]):
            print("{}: {}".format(key, value))
        return rtn

    # TODO: search in time range
    # for cid in range(current_post, current_post - n, -1):
    # while (current_post >= end_post):
    for current_post in range(start_post, end_post, -1):
        # print("\n")
        # print("POST ID: " + str(current_post))
        try:
            post = course.get_post(current_post)
            history = post['history']
            subject = history[0]['subject']
            content = history[0]['content']
            entry = re.sub("<.*?>", "", content + ' ' + subject)
            entry = re.sub("&#34;", "", entry)
            # entry = BeautifulSoup(content + ' ' + subject, "lxml").get_text()
            cids_to_content[
                str(current_post
                    )] = entry  #add post id : content to the dictionary
            current_post -= 1
            n += 1

            # print(post)
            # print(subject)
            # print(content)

            # print(json.dumps(post['history'], indent=2))
        except:
            current_post -= 1
            continue

    all_messages_df = pd.DataFrame({
        'cids': list(cids_to_content.keys()),
        'content': list(cids_to_content.values())
    })
    all_messages_df.set_index('cids')

    print("This file directory only")
    print(os.getcwd())

    #read the vaper sentiment analyzer lexicon into a dataframe - there should be one column: the polarities
    vader_path = "/Users/EdmundTian/Desktop/Projects/piazzaSentimentAnalysis/SodaHacks/mysite/vader_lexicon.txt"

    lex = ''.join(open(vader_path).readlines())
    lex_polarities = pd.read_table(vader_path,
                                   header=None,
                                   delim_whitespace=True,
                                   usecols=[0, 1])
    lex_polarities.set_index(0, inplace=True)
    lex_polarities.columns = ['polarity']
    lex_polarities['polarity'] = pd.to_numeric(lex_polarities['polarity'],
                                               errors='coerce')

    #use the above lexicon to calculate the overall sentiment for each word
    #the total sentiment of one post will be the sum of the sentiments of the sentiments of its words

    #CLEANING TEXT CONTENT BEFORE SENTIMENT ANALYSIS
    #lowercase the message + subject content to match the lowercase lexicon
    #replace all punctuation with a single space
    print("content")
    print(all_messages_df['content'])
    all_messages_df['lower_content'] = [
        text.lower() for text in all_messages_df['content']
    ]
    punct_re = r'[^ \t\n\r\f\va-zA-Z0-9_]'
    all_messages_df['no_punc'] = [
        re.sub(punct_re, " ", text)
        for text in all_messages_df['lower_content']
    ]
    #convert content into a tidy format to make sentiments easy to calculate. index is cid of the post
    tidy_format = []
    print(all_messages_df['no_punc'])

    for text, cid in zip(all_messages_df['no_punc'], all_messages_df['cids']):
        split = text.split()
        for i in range(len(split)):
            word = split[i]
            new_row = {}
            new_row['index'] = cid
            new_row['num'] = i
            new_row['word'] = word
            tidy_format.append(new_row)

    print("\n")
    print('TIDY FORMAT')
    # print(json.dumps(tidy_format, indent=2))
    tidy_format = pd.DataFrame.from_dict(tidy_format)
    tidy_format.set_index('index', inplace=True)
    # tidy_format.tail()

    print(tidy_format.head())

    print("\n")
    print('lex_polarities')
    print(lex_polarities.head())

    #find the sentiment of each tweet: we can join the table with the lexicon table.
    merged = tidy_format.merge(lex_polarities,
                               how='left',
                               left_on='word',
                               right_index=True)
    merged.sort_index(inplace=True)
    merged.fillna(0.0)
    grouped = merged.groupby('index')['polarity'].sum()

    print("\n")
    print('merged')
    print(merged.head())

    print("\n")
    print('grouped')
    print(grouped)

    all_messages_df['polarity'] = grouped.values
    # all_messages_df['polarity'][all_messages_df['cids']==grouped['index']] = grouped['index']
    # all_messages_df['polarity'].fillna(0, inplace=True)

    print("all_messages_df")
    print(all_messages_df)

    print("polarity")
    print(all_messages_df['polarity'])

    for index, row in all_messages_df.iterrows():
        for search_word in search_words:
            text = row['no_punc']
            polarity = row['polarity']
            if search_word in text:
                result[search_word]['total_sentiment'] += polarity
                result[search_word]['number_posts'] += 1

    for key in result.keys():
        if result[key]['number_posts'] != 0:
            result[key]['average_sentiment'] = float(
                result[key]['total_sentiment']) / float(
                    result[key]['number_posts'])
        if n != 0:
            result[key]['proportion_posts'] = float(
                result[key]['number_posts']) / float(n)

    print(json.dumps(result, indent=2))

    result = pd.DataFrame.from_dict(result, orient='index')
    result = result.drop('total_sentiment', axis=1)
    result = result.sort_values('average_sentiment')

    print(result)

    result = result.to_html()

    # return render(request, 'piazzapolls/results.html', {
    #                                 'information': information,
    #                                 })

    return render(request, 'piazzapolls/results.html', {
        'information': information,
        'result': result
    })