class PiazzaWrapper: def __init__(self, course_id='xxxxxxxxxxx'): self.p = Piazza() email_id = input("Enter your Piazza email ID : ") password = getpass('Piazza Password:'******'feed']: if post['type'] == 'question': # print(post['nr']) # print(post['content_snipet']) time = post['log'][0]['t'] time = datetime.datetime.strptime(time[:-1], "%Y-%m-%dT%H:%M:%S") if time.date() == today.date(): count += 1 if 'has_i' in post.keys(): count_i += 1 elif 'has_s' in post.keys(): count_s += 1 else: count_unanswered += 1 unanswered_posts.append(post['nr']) # print(time) return count, count_i, count_s, count_unanswered, unanswered_posts def get_unanswered_followup(self): posts = self.comp_photo19.iter_all_posts() count = 0 for post in posts: cid = post['nr'] content = self.comp_photo19.get_post(cid) count += self.traverse_content_tree(content) return count def traverse_content_tree(self, content): count = 0 if 'children' in content.keys(): if len(content['children']) > 0: for content_children in content['children']: count += self.traverse_content_tree(content_children) if 'no_answer' in content_children.keys(): count += content_children['no_answer'] return count def get_count_today(self): posts = self.comp_photo19.get_feed(100, 0) count, count_i, count_s, count_unanswered, unanswered_posts = self.count_posts( posts, datetime.datetime.today()) return count, count_i, count_s, count_unanswered, unanswered_posts
def create_piazza_bot(user_email, user_password, course_code): ''' Method to instantiate a Piazza Bot Parameters: user_email: Piazza user email to authenticate with user_password: Piazza password to authenticate with course_code: Class/Course code on Piazza ''' piazza = Piazza() piazza.user_login(email=user_email, password=user_password) user_profile = piazza.get_user_profile() course_site = piazza.network(course_code) return PiazzaBot(piazza, user_profile, course_site)
class PiazzaBot(): def __init__(self): self.p = Piazza() self.p.user_login(USER_NAME, USER_PASS) self.uid = self.p.get_user_profile()['user_id'] # classes = self.p.get_user_classes() self.si206 = self.p.network("jlp6m1ynp9713y") def do_bot(self): posts = self.si206.search_feed("lecture") for post in posts: id = post[u'id'] print(self.si206.get_post(id)[u'history'][0][u'content']) print("\n\n")
active = args.status groupname = args.groupname content = "What chemistry topics were new to you this past week? What was "\ "confusing?\n\n Many students have the same confusions, so take a "\ "look at what your peers have answered. See something you understand "\ "that they don't? Start a discussion! See something you are also "\ "confused about? Tell them why!\n\n Post your response in this thread"\ " before Tuesday's lecture.\n\n #pin" print('Connecting to Piazza via piazza-api...') # Piazza setup p = Piazza() p.user_login(email=email, password=None) me = p.get_user_profile() pclass = p.network(classid) print(' Logged in as: %s' % me.get('name')) print('') for i in range(nsects): if (groupname != ''): thistitle = '{} {}{}'.format(title, groupname, i + 1) disc = groupname + ' ' + str(i + 1) params = { 'status': active, 'type': 'note', 'folders': [disc], 'subject': thistitle, 'content': content, 'config': {
class CaveBot(): def __init__(self, username, password, network_id, network_name): self.p = Piazza() self.p.user_login(username, password) self.user_profile = self.p.get_user_profile() self.network = self.p.network(network_id) self.network_name = network_name self.es = Elasticsearch() self.s = Search(using=self.es, index=self.network_name) self.jobs = [] self.es_index_name = network_name self.index2nr = None self.nr2index = None self.corpus, self.lda_model, self.id2word, self.cleaned_documents = None, None, None, None self.topic_vectors = None bulk(self.es, self.index_all_with_recommendations()) # For debugging purposes and room for future play, reread every post, put into a .txt file and let lda use that. piazza_class2txt.main(self.network, "cs3169.txt") with open("index2nr.pkl", "rb") as file: self.index2nr = pickle.load(file) with open("nr2index.pkl", "rb") as file: self.nr2index = pickle.load(file) self.num_topics = 3 self.train_iter = 50 self.corpus, self.lda_model, self.id2word, self.cleaned_documents = refined_lda.main( "cs3169.txt", num_topics=self.num_topics, iterations=self.train_iter) self.recommend() def main(): if len(sys.argv) != 5: print( "Usage: python3 cave_bot.py username password network_id network_name" ) bot = CaveBot(*sys.argv[1:]) # Recommends once per post, responds to parent answers only def index_all_with_recommendations(self): for post in self.network.iter_all_posts(limit=None): trigger = False latest_state = post["history"][0] if "!RecommendMe" in latest_state[ "subject"] or "!RecommendMe" in latest_state["content"]: trigger = True # Doesn't look for its children for child in post["children"]: # felt cleaner than child.get if "history" in child: if "!RecommendMe" in child["history"][0]["content"]: trigger = True if "!@#$" in child["history"][0]["content"][:7]: trigger = False break else: if "!RecommendMe" in child["subject"]: trigger = True if "!@#$" in child["subject"][:7]: trigger = False break if trigger: self.jobs.append(post) i_answer = "" s_answer = "" for child in post["children"]: if child["type"] == "i_answer": i_answer = child["history"][0]["content"] if child["type"] == "s_answer": s_answer = child["history"][0]["content"] yield { "_op_type": "index", "_index": self.es_index_name, "_id": post["nr"], "_source": { "subject": latest_state["subject"], "content": latest_state["content"], "i_answer": i_answer, "s_answer": s_answer, "responded": trigger } } def recommend(self, debug=True): # default to true to not accidentally post for post in self.jobs: response = "!@#$\n" # response = str(recommend_with_mlt(search, post)) + "\n" # print(int(post["nr"])-1, len(corpus)) # # response += "Topic of this post: " + str(lda_model[corpus[int(post["nr"])-2]]) topic = self.topic_of(self.nr2index[post["nr"]]) response += "Topic of this post: " + str(topic) + "\n" # response += "Topic contents " + str(self.lda_model.show_topic(topic)) + "\n" response += "Contributive contents: " + str([ "(" + pair[0] + ", " + str(pair[1])[:4] + ")" for pair in self.lda_model.show_topic(topic) ]) + "\n" # # #may be useful for debugging # # response += str([id2word[id_[0]] for id_ in corpus[int(post["nr"])-2]]) # response += "Post number: " + str(post["nr"]) + "\n" # response += "Post content: " + post["history"][0]["content"] + "\n" # response += str([self.id2word[id_[0]] for id_ in self.corpus[self.nr2index[post["nr"]]]]) # response += "\n\n" # # response += str(self.get_posts_with_same_best_topic(post["nr"], topic)) + "\n" response += "Posts with same topic: " + str( self.get_posts_with_same_topic(post["nr"], topic)) if not debug: result = network.create_followup( post["children"][1], response) # both nr and entire post works else: print("#### Start Post ####") print(response) print("#### End Post ####") print("train_iter, num_topics: ", self.train_iter, self.num_topics) # deprecated and bad def get_posts_with_same_best_topic(self, post_number, target_topic, num_docs=3): # do one up one down docs = [] looked = 0 while len(docs) < 3 or looked > 100: looked += 1 nrs = [post_number + looked, post_number - looked] for nr in nrs: if nr in self.nr2index and nr != -1 and self.topic_of( self.nr2index[nr]) == target_topic: docs.append("@" + str(nr)) return docs[:num_docs] # extra atrocious def get_posts_with_same_topic(self, number, target_topic, num_docs=3): self.get_topic_vectors() vector = self.topic_vectors[self.nr2index[number]] min_dists = [float("inf") for i in range(num_docs + 1)] min_indices = [0 for i in range(num_docs + 1)] for i in range(len(self.topic_vectors)): distance = np.sqrt(np.sum((vector - self.topic_vectors[i])**2, 0)) if distance < max(min_dists): i_md = min_dists.index(max(min_dists)) print(i, distance) min_dists[i_md] = distance min_indices[i_md] = i posts = ["@" + str(self.index2nr[i]) for i in min_indices] posts.remove("@" + str(number)) # no need to recommend the same post return posts def topic_of(self, document): topics = [ topic for index, topic in self.lda_model[self.corpus[document]] ] return topics.index(max(topics)) def get_topic_vectors(self): docs_topics = [] for i in range(len(self.corpus)): doc_topics = self.lda_model.get_document_topics(self.corpus[i]) if len(doc_topics) == self.num_topics: docs_topics.append( np.array([component[1] for component in doc_topics])) else: topics = [] d_pop = doc_topics.pop(-1) for i in range(self.num_topics - 1, -1, -1): if i != d_pop[0] or len(doc_topics) == 0: topics.append(0) else: topics.append(d_pop[1]) d_pop = doc_topics.pop(-1) topics.reverse() docs_topics.append(np.array(topics)) self.topic_vectors = docs_topics def recommend_with_mlt(self, post, score_limit=0): latest_state = post["history"][0] # Can do post likelyhood later search_text = latest_state["subject"] + latest_state["content"] # maybe static this later mlt_query = { "more_like_this": { "fields": ["subject", "content", "i_answer", "s_answer"], "like": search_text, "min_term_freq": 1, "min_doc_freq": 1, "max_query_terms": 50, # "term_vector": "with_positions_offsets" } } recommendation = [] docs = s.query(mlt_query).execute() for doc in docs: if int(doc.meta["id"]) != int( post["nr"]) and doc.meta["score"] > score_limit: recommendation.append((doc.meta["id"], doc.meta["score"])) return recommendation def change_network(self, network_id): self.network = self.p.network(network_id) self.s = Search(using=self.es, index=self.network_name) self.es_index_name = network_name
def get_data(self): return ''.join(self.fed) def strip_tags(html): s = MLStripper() s.feed(html) return s.get_data() p = Piazza() p.user_login('*****@*****.**', 'thomaslau') f = open('userProfile.txt', 'w') json.dump(p.get_user_profile(), f) f.close() rawUserData = open('userProfile.txt') jsonUserData = json.load(rawUserData) rawUserData.close() masterPath = os.getcwd() for i in jsonUserData["all_classes"]: classConnection = p.network(i) posts = classConnection.iter_all_posts(limit=100000) className = jsonUserData["all_classes"][i]["num"] if className == "CS 103": x = 0
class PiazzaCompileBot(object): def __init__(self): self.p = Piazza() self.p.user_login(PIAZZA_USER, PIAZZA_PASS) self.uid = self.p.get_user_profile()['user_id'] classes = self.p.get_user_classes() self.classes = [] print 'Now watching posts for the following {0} classes:'.format(len(classes)) for c in classes: print '{0} ({1}, {2})'.format(c['num'], c['name'], c['term']) self.classes.append(self.p.network(c['nid'])) self.filter = UnreadFilter() def check(self): for c in self.classes: # ensure we go through the entire feed if there are more posts to read feed = {'more': True} while feed['more']: # filter for only updated posts feed = c.get_filtered_feed(self.filter) for feed_post in feed['feed']: # get the post number and retrieve the post post = c.get_post(feed_post['nr']) if self.already_compiled(post): print 'Post %s already compiled' % post['id'] break post_text = post['history'][0]['content'] print 'Checking post %s for code' % post['id'] # parse the text in the post # example text: """ <p></p><p>I'm having an issue, please help!</p> <p>CompileBot! python</p> <pre>def __init__(self): print 'blah'</pre> <p>Input:</p> <pre>blah</pre> """ soup = BeautifulSoup(post_text.replace("<br />", "\n")) # Look for p tags tags = soup.find_all('p') for tag in tags: try: m = None if not tag.contents else re.search(r'(?i)CompileBot[.?;:!]*\s*(?P<args>.*)\s*', tag.contents[0]) if m is not None and tag.next_sibling and tag.next_sibling.next_sibling: # look for code code = None cur_tag = tag.next_sibling.next_sibling if cur_tag and cur_tag.name == 'pre': code = cur_tag.contents[0] # look for optional stdin if code is not None: stdin = '' if cur_tag.next_sibling and cur_tag.next_sibling.next_sibling: cur_tag = cur_tag.next_sibling.next_sibling try: if cur_tag.name == 'p' and bool(re.match('input', cur_tag.contents[0], re.I)) and cur_tag.next_sibling: cur_tag = cur_tag.next_sibling.next_sibling if cur_tag and cur_tag.name == 'pre': stdin = cur_tag.contents[0] cur_tag = cur_tag.next_sibling except Exception as e: pass else: pass code = urllib.unquote(code) stdin = urllib.unquote(stdin) try: lang, opts = m.group('args').split(' -', 1) opts = ('-' + opts).split() except ValueError: # No additional opts found lang, opts = m.group('args'), [] lang = lang.strip() print 'Attempting compile for post {0}: language={1}, args={2}'.format(post['id'], lang, opts) try: details = self.compile(code, lang, stdin=stdin) print "Compiled ideone submission {link} for comment {id}".format(link=details['link'], id=post['id']) # The ideone submission result value indicaties the final state of # the program. If the program compiled and ran successfully the # result is 15. Other codes indicate various errors. result_code = details['result'] # The user is alerted of any errors via message reply unless they # include an option to include errors in the reply. if result_code in [11, 12, 15]: text = self.format_reply(details, opts) ideone_link = "http://ideone.com/{}".format(details['link']) text += "Ideone link: %s" % ideone_link print 'Compilation success!\n%s' % text c.add_followup(post['id'], text) print 'Posted results to Piazza' else: error_text = { 11: COMPILE_ERROR_TEXT, 12: RUNTIME_ERROR_TEXT, 13: TIMEOUT_ERROR_TEXT, 17: MEMORY_ERROR_TEXT, 19: ILLEGAL_ERROR_TEXT, 20: INTERNAL_ERROR_TEXT }.get(result_code, '') # Include any output from the submission in the reply. if details['cmpinfo']: error_text += "Compiler Output:\n{}\n".format( self.code_block(details['cmpinfo'])) if details['output']: error_text += "Output:\n{}\n".format( self.code_block(details['output'])) if details['stderr']: error_text += "Error Output:\n{}\n".format( self.code_block(details['stderr'])) print 'Error: %s' % error_text except ideone.IdeoneError as e: c.add_followup(post['id'], 'An Ideone error occurred.\n%s' % self.code_block(e)) print e except ValueError as e: import traceback, os.path, sys top = traceback.extract_tb(sys.exc_info()[2])[-1] print 'Parse failed: {0}'.format(', '.join([type(e).__name__, os.path.basename(top[0]), str(top[1])])) def already_compiled(self, post): children = post['children'] for child in children: if child['uid'] == self.uid: return True return False def compile(self, source, lang, stdin=''): """Compile and evaluate source sode using the ideone API and return a dict containing the output details. Keyword arguments: source -- a string containing source code to be compiled and evaluated lang -- the programming language pertaining to the source code stdin -- optional "standard input" for the program >>> d = compile('print("Hello World")', 'python') >>> d['output'] Hello World """ lang = LANG_ALIASES.get(lang.lower(), lang) # Login to ideone and create a submission i = ideone.Ideone(IDEONE_USER, IDEONE_PASS) sub = i.create_submission(source, language_name=lang, std_input=stdin) sub_link = sub['link'] details = i.submission_details(sub_link) # The status of the submission indicates whether or not the source has # finished executing. A status of 0 indicates the submission is finished. while details['status'] != 0: details = i.submission_details(sub_link) time.sleep(3) details['link'] = sub_link return details def format_reply(self, details, opts): """Returns a reply that contains the output from a ideone submission's details along with optional additional information. """ head, body, extra, = '', '', '' # Combine information that will go before the output. if '--source' in opts: head += 'Source:\n{}\n\n'.format(self.code_block(details['source'])) if '--input' in opts: # Combine program output and runtime error output. head += 'Input:\n{}\n\n'.format(self.code_block(details['input'])) output = details['output'] + details['stderr'] # Truncate the output if it contains an excessive # amount of line breaks or if it is too long. if output.count('\n') > LINE_LIMIT: lines = output.split('\n') # If message contains an excessive amount of duplicate lines, # truncate to a small amount of lines to discourage spamming if len(set(lines)) < 5: lines_allowed = 2 else: lines_allowed = 51 output = '\n'.join(lines[:lines_allowed]) output += "\n..." # Truncate the output if it is too long. if len(output) > 8000: output = output[:8000] + '\n ...\n' body += 'Output:\n{}\n\n'.format(self.code_block(output)) if details['cmpinfo']: body += 'Compiler Info:\n{}\n\n'.format(details['cmpinfo']) # Combine extra runtime information. if '--date' in opts: extra += "Date: {}\n\n".format(details['date']) if '--memory' in opts: extra += "Memory Usage: {} bytes\n\n".format(details['memory']) if '--time' in opts: extra += "Execution Time: {} seconds\n\n".format(details['time']) if '--version' in opts: extra += "Version: {}\n\n".format(details['langVersion']) # To ensure the reply is less than 10000 characters long, shorten # sections of the reply until they are of adequate length. Certain # sections with less priority will be shortened before others. total_len = 0 for section in (FOOTER, body, head, extra): if len(section) + total_len > 9800: section = section[:9800 - total_len] + '\n...\n' total_len += len(section) reply_text = head + body + extra return reply_text def code_block(self, output): return "<pre>{0}</pre>".format(output)
self.fed = [] def handle_data(self, d): self.fed.append(d) def get_data(self): return ''.join(self.fed) def strip_tags(html): s = MLStripper() s.feed(html) return s.get_data() p = Piazza() p.user_login('*****@*****.**', 'thomaslau') f = open('userProfile.txt','w') json.dump(p.get_user_profile(), f) f.close() rawUserData = open('userProfile.txt') jsonUserData = json.load(rawUserData) rawUserData.close() masterPath = os.getcwd() for i in jsonUserData["all_classes"]: classConnection = p.network(i) posts = classConnection.iter_all_posts(limit=100000) className = jsonUserData["all_classes"][i]["num"] if className == "CS 103":
class PiazzaBot(object): def __init__(self, user, password, class_id, corpus=None, corpus_embeddings=None, default_bert=True): self.p = Piazza() self.p.user_login(user, password) self.class_id = class_id self.user_profile = self.p.get_user_profile() self.network = self.p.network(class_id) self.DB_manger = MongoDBManger() self.bert = BertSemanticSearch(corpus, corpus_embeddings, default_bert) self.parallel_cid_list = [] def heart_beat(self): """ triggers the heart beat code which process all new posts and puts the data for them into the db and also make new postings and suggestions for posts in our :return: NA """ posts = self.network.iter_all_posts() for post in posts: try: cid = post["id"] query = {"cid": cid} result = self.DB_manger.find(query) db_dict = self.create_db_dict(post, result) # TODO: remove HTML tags if result is None and db_dict is not None: self.DB_manger.insert(db_dict) if not db_dict["is_marked"]: self.create_piazza_bot_follow_up( cid, "Piazza Bot is trying to process this post") self.make_private(db_dict) elif db_dict is not None: if not db_dict["is_processed"] and db_dict[ "is_marked"] and len(self.parallel_cid_list) != 0: self.make_piazza_suggestions(db_dict, cid) elif not db_dict["is_marked"]: print("here") self.create_piazza_bot_follow_up( cid, "Piazza Bot is trying to process this post") self.make_private(db_dict) self.DB_manger.insert_update(query, db_dict) # update the value in the db if the change_log or history has changed if db_dict["change_log_len"] > result[ "change_log_len"] or db_dict["revision"] > result[ "revision"]: self.DB_manger.insert_update(query, db_dict) except KeyError: print("no cid") def generate_embeddings(self): """ generate the embeddings for all the current posts in the data base :return: NA """ docs = self.DB_manger.get_all() if docs is None: return 1 corpus = [] corpus_embeddings = [] parallel_cid_list_local = [] for doc in docs: corpus.append(doc["content"]) corpus_embeddings.append(pickle.loads(doc["encoding"])) parallel_cid_list_local.append(doc["cid"]) # turn list of loaded tensors to a single tensor corpus_embeddings = [ torch.unsqueeze(t, dim=0) for t in corpus_embeddings ] corpus_embeddings = torch.cat(corpus_embeddings, dim=0) self.bert.set_corpus(corpus) self.bert.set_corpus_embeddings(corpus_embeddings) self.parallel_cid_list = parallel_cid_list_local def create_db_dict(self, post, old_post, tensor=True): """ generate the embeddings for all the current posts in the data base :param post: the new post json data we want to process into a dict we can put into the db :param old_post: old db value for the current post :return: post dict formatted for the DB """ try: cid = post["id"] history = post["history"] change_log_len = len(post["change_log"]) revision = len(history) cur_post_content = history[-1] uid = self.find_uid(cur_post_content) if "gd6v7134AUa" == uid: return None post_type = post["type"] post_folders = post['folders'] post_subject = cur_post_content['subject'] post_content = cur_post_content['content'] is_marked_by_pb, is_processed, mark_id = self.is_marked_by_piazza_bot( post["children"], old_post) new_value = { "cid": cid, "revision": revision, "change_log_len": change_log_len, "uid": uid, "type": post_type, "folders": post_folders, "subject": post_subject, "content": post_content, "is_marked": is_marked_by_pb, "mark_id": mark_id, "is_processed": is_processed } # generate a new embedding if this is first time this post is being added to the db or if there was a content update if tensor and (old_post is None or revision > old_post["revision"]): encoding = pickle.dumps(self.bert.encode_content(post_content)) print(encoding) new_value["encoding"] = encoding return new_value except KeyError as e: print(e) return None def is_marked_by_piazza_bot(self, children, old_post): """ figure out of the current post has been marked by the bot and processed. if the current post has been marked then get the cid for the marking follow up :param children: current children posts(follow ups) for the current post :param old_post: old db value for the current post :return: boolean, boolean, cid """ len_children = len(children) if len_children == 0: print("getting childern len 0") return False, False, "None" for follow_up in children: if follow_up['type'] == "i_answer": return True, True, "None" subject = follow_up['subject'] if subject == "Piazza Bot is trying to process this post": return True, False, follow_up['id'] elif subject == "Piazza Bot Has Processed this post": return True, True, follow_up['id'] elif len(subject ) > 24 and subject[:24] == '<p><b>Piazza Bot</b></p>': return True, False, follow_up['id'] if old_post is not None and old_post["is_marked"]: return True, True, old_post["mark_id"] return False, False, "None" def make_private(self, db_dict): """ make the post associate with the current db dict object private :param db_dict: db dict object of the post we want to make private :return: 1 if successful else 0 """ try: if "gd6v7134AUa" != db_dict["uid"]: self.update_post(db_dict["cid"], db_dict["type"], db_dict["revision"], db_dict["folders"], db_dict["subject"], db_dict["content"], False) return 1 except KeyError: return 0 def make_suggestion_string(self, cur_cid, post_cid): link = '<p><a href="https://piazza.com/class/kg9odngyfny6s9?cid={}" target="_blank" rel="noopener">Potential Duplicate of @{}</a></p>'.format( cur_cid, cur_cid) mark_dup = '<p><a href="http://127.0.0.1:5000/api/dup/{}/{}" target="_blank" rel="noopener">Mark Current Post as Duplicate of @{}</a>'.format( post_cid, cur_cid, cur_cid) mark_followup = 'or <a href="http://127.0.0.1:5000/api/followup/{}/{}" target="_blank" rel="noopener">Mark Current Post as Follow up of @{}</a></p>'.format( post_cid, cur_cid, cur_cid) return link + mark_dup + mark_followup def make_piazza_suggestions(self, db_dict, cid): #TODO add getting suggestions code msg = '<p><b>Piazza Bot</b></p><p><a href="http://127.0.0.1:5000/api/post/{}" target="_blank" rel="noopener">Make Post Public</a></p>'.format( cid) try: if "gd6v7134AUa" != db_dict["uid"]: topk_idxs = self.bert.single_semantic_search( db_dict["content"], top_k=3) topk_cids = [self.parallel_cid_list[idx] for idx in topk_idxs] for dup_cid in topk_cids: if dup_cid != cid: msg += self.make_suggestion_string(dup_cid, cid) self.update_follow_up(db_dict["mark_id"], msg) return 1 except KeyError: return 0 def find_uid(self, cur_post_content): """ find the uid from the most latest post history(content) :param cur_post_content: the content params fot he post we are working on :return: the uid for the user who made the last edit on this post """ try: uid = cur_post_content["uid"] except KeyError: uid = "" return uid def create_post(self, post_folders, post_subject, post_content, post_type="question", is_announcement=0, bypass_email=0, anonymous=False): """ For simulating asking a question on Piazza. See the Piazza package for full documentation. NOTE: post_folders is actually a list of string, not a single string """ info = self.network.create_post(post_type=post_type, post_folders=post_folders, post_subject=post_subject, post_content=post_content, is_announcement=is_announcement, bypass_email=bypass_email, anonymous=anonymous) return info def update_post(self, cid, post_type, revision, post_folders, post_subject, post_content, visibility_all=True): """Update a post :param cid: cid of the post we want to update :param post_type: the type we want to change the post to "note", "question" or "poll" :param revision: :param post_folders: :param post_subject: :param post_content: :param visibility_all: change post visibility from all to just the instructors and original poster :return: if the post update was successful """ params = { "cid": cid, "subject": post_subject, "content": post_content, "folders": post_folders, "type": post_type, "revision": revision, "visibility": "all" if visibility_all else "private" } print(params) return self.network._rpc.content_update(params) def create_piazza_bot_follow_up(self, cid, content, ionly=False): """Create a follow-up on a post. :param cid: cid of the post we want to add this follow up too :param content: content of the follow up post :param ionly: make the visibility of the follow only instructors :return: follow up was created """ params = { "cid": cid, "type": "followup", "subject": content, "content": "", } if ionly: params["config"] = {"ionly": True}, return self.network._rpc.content_create(params) def update_follow_up(self, followup_post, content): """update a follow-up on a post :param followup_post: json of the follow up post :param content: content of the follow up post :return: if the follow up post was successful updated """ self.network.update_post(followup_post, content) def get_post(self, cid): """ retrieve data for a certain post :param cid: cid of the post of you want to retrieve data for :return: if the post update was successful """ return self.network.get_post(cid) def get_post_from_db(self, cid): """ retrieve data from the db for a certain post :param cid: cid of the post of you want to retrieve data for :return: Mongo result object """ query = {"cid": cid} return self.DB_manger.find(query) def mark_as_duplicate(self, duplicated_cid, master_cid, msg='Piazza bot found this Duplicate'): """ make the given post as duplicate of another :param duplicated_cid: cid of the post of you want to make as duplicate :param master_cid: cid of the post of you want to put the duplicate under :param msg: msg for why the post is marked as a duplicate :return: if the duplicate mark request was successful """ self.network.mark_as_duplicate(duplicated_cid, master_cid, msg) def delete_post(self, cid): """ delete a post from piazza :param cid: cid of the post of you want to delete :return: if the delete request was successful """ self.network.delete_post(cid) def delete_post_db(self, cid): """ delete a post from the db :param cid: cid of the post of you want to delete :return: Mongo result object """ return self.DB_manger.del_by_cid(cid) def get_piazza_suggestions(self, query): params = {"nid": self.class_id, "query": query} r = self.network._rpc.request(method="network.find_similar", data=params) return self.network._rpc._handle_error( r, "Could not get suggestions {}.".format(repr(params))) def get_full_piazza(self): posts = self.network.iter_all_posts() dataframe_cols = ["cid", "content", "match 1", "match 2", "match 3"] dataframe = pd.DataFrame(columns=dataframe_cols) for post in posts: db_dict = self.create_db_dict(post, None, False) if db_dict is None: continue content = db_dict["content"] result = self.get_piazza_suggestions(content) cid = db_dict["cid"] try: suggestions = result["list"] except KeyError: continue counter = 0 new_row = { "cid": cid, "content": content, "match 1 cid": "None", "match 2 cid": "None", "match 3 cid": "None" } for suggestion in suggestions: if suggestion['id'] != cid: if counter == 0: new_row["match 1 cid"] = suggestion['id'] elif counter == 1: new_row["match 2 cid"] = suggestion['id'] elif counter == 2: new_row["match 3 cid"] = suggestion['id'] break counter += 1 dataframe = dataframe.append(new_row, ignore_index=True) dataframe.to_csv( r"C:\Users\sohai\Documents\Uni 2020\csc392\piazzabot\data\paizza_api_matchs.csv" )
def results(request, information_id): information = get_object_or_404(Information, pk=information_id) #THE "BACKEND" WORD HAPPENS HERE p = Piazza() email = information.email password = information.password print(email) print(password) p.user_login(email=email, password=password) user_profile = p.get_user_profile() # course = p.network("jcfrsqcwoyyi5") # CS186 # course = p.network("j5vqc3j229b6u7") # CS170 course = p.network(information.course_ID) # CS61b # search_words = ["System R", "Grace Hash Join", "Query Optimization", "IO", "hash join"] # search_words = ["hash join", "table scan", "sort merge join", "system r", "dependency graph", "hw 4", "query optimization"] # search_words = ["max flow", "dynamic programming", "dp", "linear programming", "np", "reduction", "greedy", "recurrence", "bipartite"] # search_words = ["asymptotic", "disjoint set", "tree", "hash", "heap", "graph", "traversal", "dynamic programming", "sort", "reduction"] search_words = [x.strip() for x in information.keywords.split(',')] # start_post = 5668 start_post = information.last_CID end_post = max(0, start_post - 300) n = 50 # Earliest post date # year = 2017 # month = 12 # day = 01 cids_to_content = { } #dictionary in the form {cid: message content + ' ' + subject content}) current_post = start_post # Return object result = {} ''' Format for result { word: { avg_sentiment: float porportion_posts: float } } ''' ''' Helper Functions ''' for word in search_words: result[word] = { 'total_sentiment': 0.0, 'average_sentiment': 0.0, 'number_posts': 0, 'proportion_posts': 0.0 } def contains(s, word_list): rtn = {} for keyword in word_list: rtn[keyword] = 0 for w in s.split(): if w in word_list: rtn[w] += 1 for key, value in sorted(rtn.items(), key=lambda x: -x[1]): print("{}: {}".format(key, value)) return rtn # TODO: search in time range # for cid in range(current_post, current_post - n, -1): # while (current_post >= end_post): for current_post in range(start_post, end_post, -1): # print("\n") # print("POST ID: " + str(current_post)) try: post = course.get_post(current_post) history = post['history'] subject = history[0]['subject'] content = history[0]['content'] entry = re.sub("<.*?>", "", content + ' ' + subject) entry = re.sub(""", "", entry) # entry = BeautifulSoup(content + ' ' + subject, "lxml").get_text() cids_to_content[ str(current_post )] = entry #add post id : content to the dictionary current_post -= 1 n += 1 # print(post) # print(subject) # print(content) # print(json.dumps(post['history'], indent=2)) except: current_post -= 1 continue all_messages_df = pd.DataFrame({ 'cids': list(cids_to_content.keys()), 'content': list(cids_to_content.values()) }) all_messages_df.set_index('cids') print("This file directory only") print(os.getcwd()) #read the vaper sentiment analyzer lexicon into a dataframe - there should be one column: the polarities vader_path = "/Users/EdmundTian/Desktop/Projects/piazzaSentimentAnalysis/SodaHacks/mysite/vader_lexicon.txt" lex = ''.join(open(vader_path).readlines()) lex_polarities = pd.read_table(vader_path, header=None, delim_whitespace=True, usecols=[0, 1]) lex_polarities.set_index(0, inplace=True) lex_polarities.columns = ['polarity'] lex_polarities['polarity'] = pd.to_numeric(lex_polarities['polarity'], errors='coerce') #use the above lexicon to calculate the overall sentiment for each word #the total sentiment of one post will be the sum of the sentiments of the sentiments of its words #CLEANING TEXT CONTENT BEFORE SENTIMENT ANALYSIS #lowercase the message + subject content to match the lowercase lexicon #replace all punctuation with a single space print("content") print(all_messages_df['content']) all_messages_df['lower_content'] = [ text.lower() for text in all_messages_df['content'] ] punct_re = r'[^ \t\n\r\f\va-zA-Z0-9_]' all_messages_df['no_punc'] = [ re.sub(punct_re, " ", text) for text in all_messages_df['lower_content'] ] #convert content into a tidy format to make sentiments easy to calculate. index is cid of the post tidy_format = [] print(all_messages_df['no_punc']) for text, cid in zip(all_messages_df['no_punc'], all_messages_df['cids']): split = text.split() for i in range(len(split)): word = split[i] new_row = {} new_row['index'] = cid new_row['num'] = i new_row['word'] = word tidy_format.append(new_row) print("\n") print('TIDY FORMAT') # print(json.dumps(tidy_format, indent=2)) tidy_format = pd.DataFrame.from_dict(tidy_format) tidy_format.set_index('index', inplace=True) # tidy_format.tail() print(tidy_format.head()) print("\n") print('lex_polarities') print(lex_polarities.head()) #find the sentiment of each tweet: we can join the table with the lexicon table. merged = tidy_format.merge(lex_polarities, how='left', left_on='word', right_index=True) merged.sort_index(inplace=True) merged.fillna(0.0) grouped = merged.groupby('index')['polarity'].sum() print("\n") print('merged') print(merged.head()) print("\n") print('grouped') print(grouped) all_messages_df['polarity'] = grouped.values # all_messages_df['polarity'][all_messages_df['cids']==grouped['index']] = grouped['index'] # all_messages_df['polarity'].fillna(0, inplace=True) print("all_messages_df") print(all_messages_df) print("polarity") print(all_messages_df['polarity']) for index, row in all_messages_df.iterrows(): for search_word in search_words: text = row['no_punc'] polarity = row['polarity'] if search_word in text: result[search_word]['total_sentiment'] += polarity result[search_word]['number_posts'] += 1 for key in result.keys(): if result[key]['number_posts'] != 0: result[key]['average_sentiment'] = float( result[key]['total_sentiment']) / float( result[key]['number_posts']) if n != 0: result[key]['proportion_posts'] = float( result[key]['number_posts']) / float(n) print(json.dumps(result, indent=2)) result = pd.DataFrame.from_dict(result, orient='index') result = result.drop('total_sentiment', axis=1) result = result.sort_values('average_sentiment') print(result) result = result.to_html() # return render(request, 'piazzapolls/results.html', { # 'information': information, # }) return render(request, 'piazzapolls/results.html', { 'information': information, 'result': result })