class PiazzaScraper: # class_code refers to the k12qbt838di4xt in https://piazza.com/class/k12qbt838di4xt def __init__(self, class_code, username, password): self.piazza = Piazza() self.piazza.user_login(username, password) self.piazza_class = self.piazza.network(class_code) def get_all_posts(self): # get_all_posts returns a list of dictionaries. The dictionaries contains information of the thread that resides # in the Piazza classroom feed = self.piazza_class.get_feed(limit=999999, offset=0) thread_ids = [post['id'] for post in feed["feed"]] posts = [] for thread_id in thread_ids: posts.append(self.piazza_class.get_post(thread_id)) return posts def get_post(self, thread_id): # get_post allows the user to get the dictionary that contains information of a single thread return self.piazza_class.get_post(thread_id) def get_users(self): # get_users allows the user to get a list of dictionaries that contains the profile of a Piazza user return self.piazza_class.get_all_users() def get_user(self, user_id): # get_users allows the user to get a dictionary that contains the profile of a Piazza user return self.piazza_class.get_users(user_id)
def checkPiazza(): client = pymongo.MongoClient(pizzapizzasecret.dbsecret) db = client.get_database('piazza-posts') table = db.posts existingPosts = [] query = table.find() print(query) output = {} i = 0 for x in query: output[i] = x output[i].pop('_id') existingPosts.append(output[i]['ID']) i += 1 print(existingPosts) p = Piazza() p.user_login(pizzapizzasecret.email, pizzapizzasecret.password) ds = p.network(pizzapizzasecret.net) #print("ds", ds.iter_all_posts()) posts = ds.iter_all_posts() for post in posts: #print("post", post) if post['history'][0]['subject'] not in existingPosts: payload = post['history'][0]['subject'] print(payload) queryObject = { 'ID': payload, } queryMongo = table.insert_one(queryObject) sendPayload(post) else: print("piazza channel up to date") return "success"
def create_piazza_post(title: str, content: str, folder: str) -> int: """Create a piazza post from the given parameters and return the post's CID.""" EMAIL = Config.get_global().piazza_email PWD = Config.get_global().piazza_password ID = Config.get_global().piazza_id p = Piazza() p.user_login(EMAIL, PWD) post = p.network(ID) while True: try: result = post.create_post( title=title, content=content, nid=ID, folder=folder ) except piazza_api.exceptions.RequestError as e: str_e = str(e) if "posting too quickly" in str_e: # Piazza has posting time limits, grumble grumble # "Sorry! It looks like you are posting too quickly--wait a second and try again." # Try again time.sleep(0.4) continue else: # Don't handle this raise e # All done now! break return int(result['nr'])
def post_aggregate(email, password, coursecode): # User credentials piazza = Piazza() # Login piazza.user_login(email, password) # get course code to hash dictionary coursetohash = course_hash(email, password) # get classroom object using the hash classroom = piazza.network(coursetohash[coursecode]) # go through all the posts, aggregate them in a data-structure postquestions = [] # board_posts type is generator: cannot access memory in the lazy list board_posts = classroom.iter_all_posts(limit=const_limit) # iterate through board posts for post in board_posts: # get rid of html tags in question in post question_string = strip_tags(post["history"][0]["content"]) # append to questions array postquestions.append(question_string) return postquestions
def perform_action(action, course, as_staff=False, is_test=None, kwargs=None): with connect_db() as db: if as_staff: user, pw = db( "SELECT staff_user, staff_pw FROM piazza_config WHERE course = (%s)", [course], ).fetchone() else: user, pw = db( "SELECT student_user, student_pw FROM piazza_config WHERE course = (%s)", [course], ).fetchone() if is_test: (course_id, ) = db( "SELECT test_course_id FROM piazza_config WHERE course = (%s)", [course], ).fetchone() else: (course_id, ) = db( "SELECT course_id FROM piazza_config WHERE course = (%s)", [course]).fetchone() p = Piazza() p.user_login(user, pw) course = p.network(course_id) if kwargs is None: kwargs = {} try: return getattr(course, action)(**kwargs) except Exception as e: return str(e), 400
def setUp(self): username = config.username password = config.password class_id = 'idj5lp6gixc6xn' p = Piazza() p.user_login(email=username, password=password) self.cis121 = p.network(class_id)
async def send_new_posts(): load_sent_ids() print('\n\nloaded') print(sent_ids) print('\n\n') p = Piazza() p.user_login("email", "pass") user_status = p.get_user_status() nid = user_status['networks'][0]['id'] eecs2030 = p.network(nid) feed = eecs2030.get_feed(limit=99999) feed_arr = feed['feed'] piazzaobj_arr = [] for post in feed_arr: full_post = eecs2030.get_post(post['id']) latest_update = full_post['history'][0] piazzaobj_arr.append( PiazzaObject(post['id'], latest_update['subject'], latest_update['content'], full_post['created'], 'https://piazza.com/class?cid=' + post['id'])) print(sent_ids) for piazzaobj in piazzaobj_arr: if not piazzaobj.id in sent_ids: print(f'sending {piazzaobj.id}') await send_post(piazzaobj) sent_ids.append(piazzaobj.id) save_sent_ids()
def main(argv=sys.argv[1:]): argument_parser = argparse.ArgumentParser() # argument_parser.add_argument('--username', type=str, # default='PiazzaBot', help='the bot\'s Keybase username') args = argument_parser.parse_args(argv) logger.info('keybase_piazza_bot starting') event_loop = asyncio.get_event_loop() piazza = Piazza() piazza.user_login() # login prompt cse220 = piazza.network(PIAZZA_NETWORK_ID) keybase_bot_handler = KeybaseBotHandler(piazza, cse220) keybase_bot = Bot( handler=keybase_bot_handler, loop=event_loop, # username=args.username, ) future = keybase_bot.start({}) event_loop.run_until_complete(future) event_loop.close() logger.info('keybase_piazza_bot exiting')
class PiazzaWrapper: def __init__(self, course_id='xxxxxxxxxxx'): self.p = Piazza() email_id = input("Enter your Piazza email ID : ") password = getpass('Piazza Password:'******'feed']: if post['type'] == 'question': # print(post['nr']) # print(post['content_snipet']) time = post['log'][0]['t'] time = datetime.datetime.strptime(time[:-1], "%Y-%m-%dT%H:%M:%S") if time.date() == today.date(): count += 1 if 'has_i' in post.keys(): count_i += 1 elif 'has_s' in post.keys(): count_s += 1 else: count_unanswered += 1 unanswered_posts.append(post['nr']) # print(time) return count, count_i, count_s, count_unanswered, unanswered_posts def get_unanswered_followup(self): posts = self.comp_photo19.iter_all_posts() count = 0 for post in posts: cid = post['nr'] content = self.comp_photo19.get_post(cid) count += self.traverse_content_tree(content) return count def traverse_content_tree(self, content): count = 0 if 'children' in content.keys(): if len(content['children']) > 0: for content_children in content['children']: count += self.traverse_content_tree(content_children) if 'no_answer' in content_children.keys(): count += content_children['no_answer'] return count def get_count_today(self): posts = self.comp_photo19.get_feed(100, 0) count, count_i, count_s, count_unanswered, unanswered_posts = self.count_posts( posts, datetime.datetime.today()) return count, count_i, count_s, count_unanswered, unanswered_posts
def main(): p = Piazza() p.user_login() classId = input("Enter your class id") classObj = p.network(classId) postId = input("Enter post number") posts = classObj.get_post(postId) all_users = classObj.get_all_users() generate_csv(all_users, posts)
def piazza_reader(email, password, coursecode): # User credentials piazza = Piazza() # Login piazza.user_login(email, password) class_dictionary = piazza.get_user_classes() # dictionary for 'course code: hash_id' coursetohash = {} # print(len(class_dictionary)) for i in range(len(class_dictionary)): coursetohash.update( {class_dictionary[i]['num']: class_dictionary[i]['nid']}) print(coursetohash) classroom = piazza.network(coursetohash[coursecode]) # print(coursecode) # print(coursetohash[coursecode]) # go through all the posts, aggregate them in a data-structure postquestions = [] postanswers = [] # board_posts type is generator: cannot access memory in the lazy list board_posts = classroom.iter_all_posts(limit=const_limit) # iterate through board posts for post in board_posts: # get rid of html tags in question in post question_string = strip_tags(post["history"][0]["content"]) # append to questions array postquestions.append(question_string) # checks if there's an answer associated to the question if "children" in post.keys( ) and post["children"] and "history" in post["children"][0]: # for all answers in a single post (iterate) for answer_index in range(len(post["children"][0]["history"])): # get rid of html tags for answers in the post, and check if the entry is a string if type(post["children"][0]["history"][answer_index] ["content"]) == str: answer_string = strip_tags(post["children"][0]["history"] [answer_index]["content"]) # append to answers array postanswers.append(answer_string) # print(postQuestions + postAnswers) return " ".join(postquestions + postanswers)
def _connect_to_piazza(self, piazza_credentials): """Connect to Piazza""" _piazza = Piazza() _piazza.user_login(email=piazza_credentials['piazza_email'], password=piazza_credentials['piazza_password']) self._myclass = _piazza.network(piazza_credentials['piazza_network']) # Get list of cid's from feed self._feed = self._myclass.get_feed(limit=999999, offset=0) self._instructor_ids = [ user['id'] for user in self._myclass.get_all_users() if user['admin'] == True ]
def main(): parser = argparse.ArgumentParser(description='Retrieve Piazza posts and answers in JSON format.') parser.add_argument('--networkid', '-n', help='piazza network ID', required=True) parser.add_argument('--output', '-o', help='output filename', required=True) args = parser.parse_args() piazza = Piazza() print('Login to Piazza:') piazza.user_login() network = piazza.network(args.networkid) write_all_posts(network, args.output)
def create_piazza_bot(user_email, user_password, course_code): ''' Method to instantiate a Piazza Bot Parameters: user_email: Piazza user email to authenticate with user_password: Piazza password to authenticate with course_code: Class/Course code on Piazza ''' piazza = Piazza() piazza.user_login(email=user_email, password=user_password) user_profile = piazza.get_user_profile() course_site = piazza.network(course_code) return PiazzaBot(piazza, user_profile, course_site)
class Bot: def __init__(self, course_code=config.eecs281): self.piazza = Piazza() self.piazza.user_login(config.creds['email'], config.creds['password']) self.course = self.piazza.network(course_code) # rpc api to post notes self.piazza_rpc = PiazzaRPC(config.class_code) self.piazza_rpc.user_login(config.creds['email'], config.creds['password']) def get_all_posts_json(self): documents = [] posts = [] file_name = '{0}.txt'.format(config.class_code) if not os.path.isfile(file_name): data = self.course.iter_all_posts(limit=INF) for post in data: print('downloading post {0}'.format(post['nr'])) documents.append(post) posts.append(Post(post)) obj = open(file_name, 'wb') json.dump(documents, obj) else: obj = open(file_name, 'r') data = json.load(obj) for post in data: posts.append(Post(post)) return posts def get_all_posts(self, start_id=0, limit=100): documents = [] feed = self.course.get_feed() ids = [post['nr'] for post in feed['feed']] for post_id in ids: if post_id > start_id: print('downloading post {0}'.format(post_id)) post_json = self.course.get_post(post_id) documents.append(Post(post_json)) return documents def get_post(self, id): return Post(self.course.get_post(id)) def create_post(self, subject, body, folder=['hw1']): params = {'type':'note','subject':subject, 'content':body, 'folders':folder} self.piazza_rpc.content_create(params) def create_answer(self, post_id, content): params = { 'cid': post_id, 'type': 'i_answer', 'content': content, 'revision': 0} return self.piazza_rpc.content_instructor_answer(params)
class PiazzaBot(): def __init__(self): self.p = Piazza() self.p.user_login(USER_NAME, USER_PASS) self.uid = self.p.get_user_profile()['user_id'] # classes = self.p.get_user_classes() self.si206 = self.p.network("jlp6m1ynp9713y") def do_bot(self): posts = self.si206.search_feed("lecture") for post in posts: id = post[u'id'] print(self.si206.get_post(id)[u'history'][0][u'content']) print("\n\n")
def comment_post_aggregate(email, password, coursecode): # User credentials piazza = Piazza() # Login piazza.user_login(email, password) # get course code to hash dictionary coursetohash = course_hash(email, password) # get classroom object using the hash classroom = piazza.network(coursetohash[coursecode]) # go through all the posts, aggregate them in a data-structure postquestions = [] postanswers = [] # board_posts type is generator: cannot access memory in the lazy list board_posts = classroom.iter_all_posts(limit=const_limit) # iterate through board posts for post in board_posts: # get rid of html tags in question in post question_string = strip_tags(post["history"][0]["content"]) # append to questions array postquestions.append(question_string) # checks if there's an answer associated to the question if "children" in post.keys( ) and post["children"] and "history" in post["children"][0]: # for all answers in a single post (iterate) for answer_index in range(len(post["children"][0]["history"])): # get rid of html tags for answers in the post, and check if the entry is a string if type(post["children"][0]["history"][answer_index] ["content"]) == str: answer_string = strip_tags(post["children"][0]["history"] [answer_index]["content"]) # append to answers array postanswers.append(answer_string) # print(postQuestions + postAnswers) return postquestions + postanswers
def pubish_to_piazza(message_contents): # Get the meeting URL + meeting password message_body = base64.urlsafe_b64decode( message_contents["payload"]["body"]["data"]).decode("utf-8").replace( "&", "&") meeting_url = re.findall('(https?://[^\s]+)', message_body)[2] meeting_password = re.findall(r'Access Password: (.{8})', message_body)[0] # Post them to Piazza piazza_message_content = f"Recording URL: {meeting_url}\nMeeting Password: {meeting_password}" piazza_client = Piazza() piazza_client.user_login(email=PIAZZA_EMAIL, password=PIAZZA_PASSWORD) piazza_class = piazza_client.network(PIAZZA_CLASS_NETWORK_ID) piazza_class.create_post(post_type="note", post_folders=["logistics"], post_subject="New Recording Available", post_content=piazza_message_content) print("Uploaded new recording to Piazza!")
def get_headlines(): """ Description: This function will start the process of retrieving piazza posts and concatenating them. Parameters: None Return: The string for alexa to read out """ #retrieves login information with open('config.json') as login_info: user_pass_dict = json.load(login_info) #creates a session to allow use of amazon sess = requests.Session() sess.headers.update({'User-Agent': 'I am making a project: shbanki'}) #creates piazza object and logs into account p = Piazza() p.user_login(user_pass_dict["user"], user_pass_dict["passwd"]) #goes to specific class information class_board = p.network("jsux9glwaxm4m") post_index = 1 number_of_posts = 3 data = class_board.iter_all_posts(number_of_posts) current_post = next(data) output = concatenate_post(current_post) while (number_of_posts > post_index): output += 'The next post reads... ' current_post = next(data) output += concatenate_post(current_post) post_index += 1 return output
def main(): # Read all relevant config variables conf = config_env() # Setup Piazza piazza = Piazza() piazza.user_login(email=conf.PIAZZA_EMAIL, password=conf.PIAZZA_PASSWORD) network = piazza.network(conf.PIAZZA_ID) # Setup Slack bot = Slacker(conf.SLACK_TOKEN) # Get the last posted_id last_id = get_max_id(network.get_feed()['feed']) post_base_url = "https://piazza.com/class/{}?cid=".format(conf.PIAZZA_ID) # Run loop check_for_new_posts(network, bot, conf.SLACK_BOT_NAME, conf.SLACK_CHANNEL, last_id, post_base_url)
def main(piazza_class, output_filename): # Use Piazza API (https://github.com/hfaran/piazza-api) to login and fetch all posts for given class p = Piazza() p.user_login() piazza_class = p.network(CLASS_NETWORK_IDS[piazza_class]) posts = piazza_class.iter_all_posts() f = open(output_filename, 'w') num_posts = 0 for post in posts: if 'type' in post: if unicode2str(post['type']) == 'question': title = '' question = '' answer = '' timestamp = unicode2str(post['created']) tags = ' '.join(post['tags']) if post['history']: if post['history'][0]['subject']: title = unicode2str(post['history'][0]['subject']) if post['history'][0]['content']: question = unicode2str(post['history'][0]['content']) if post['children']: if post['children'][0]: if 'history' in post['children'][0]: answer = unicode2str( post['children'][0]['history'][0]['content']) # Print each post as a single line in output file, with parts of posts delimited by '@@@' try: f.write(title + '@@@' + question + '@@@' + answer + '@@@' + timestamp + '@@@' + tags + '\n') except UnicodeEncodeError as err: print err num_posts -= 1 num_posts += 1 print('Scraped %s posts\n' % num_posts)
from piazza_api import Piazza import json import sys p = Piazza() p.user_login() course = p.network(sys.argv[1]) mapSave = {} posts = course.iter_all_posts(limit=100000000000) for post in posts: content = post["history"][0]["content"] id = post["nr"] print(id) mapSave[id] = content with open("posts183.json", "wb") as f: f.write(json.dumps(mapSave))
class_code = input('Please enter the course code or enter to start parsing. To find the course code, visit the Piazza class and copy the last part of the URL (i.e for https://piazza.com/class/asdfghjkl, paste in \'asdfghjkl\'): ') if class_code == '': add_another_class = False continue class_name = input('Please enter the class name for this code: ') classes[class_code] = class_name continue_input = input( 'Enter "begin" to start parsing the Piazza networks OR press enter to add another class: ') if continue_input == 'begin': add_another_class = False total_posts = 0 # TODO refactor this procedural logic into a more readable class for class_code, class_name in classes.items(): piazza_class = p.network(class_code) output_file = class_name + '_network.gexf' feed = piazza_class.get_feed(limit=10000) cids = [post['id'] for post in feed["feed"]] edges = dict() nodes = set() node_sizes = dict() node_interactions = dict() post = None rootLogger.info('Parsing class %s with code %s', class_name, class_code) i = 0 for post_id in cids:
import logging import json logging.captureWarnings(True) if len(sys.argv) > 2: search_query = sys.argv[1] else: search_query = "Nothing" p = Piazza() password = "" with open("password.txt") as f: password = f.read() p.user_login("*****@*****.**", password) course = p.network("i4skbzt4mxk3ck") data = {} with open("test.txt", "w") as g: for i in course.iter_all_posts(limit=100): print i.get("history")[0]["subject"] print "piazza.com/class/i4skbzt4mxk3ck?cid=%d" % i["nr"] data[i.get("history")[0]["subject"]] = "piazza.com/class/i4skbzt4mxk3ck?cid=%d" % i["nr"] json.dump(data, g)
groupname = args.groupname content = "What chemistry topics were new to you this past week? What was "\ "confusing?\n\n Many students have the same confusions, so take a "\ "look at what your peers have answered. See something you understand "\ "that they don't? Start a discussion! See something you are also "\ "confused about? Tell them why!\n\n Post your response in this thread"\ " before Tuesday's lecture.\n\n #pin" print('Connecting to Piazza via piazza-api...') # Piazza setup p = Piazza() p.user_login(email=email, password=None) me = p.get_user_profile() pclass = p.network(classid) print(' Logged in as: %s' % me.get('name')) print('') for i in range(nsects): if (groupname != ''): thistitle = '{} {}{}'.format(title, groupname, i + 1) disc = groupname + ' ' + str(i + 1) params = { 'status': active, 'type': 'note', 'folders': [disc], 'subject': thistitle, 'content': content, 'config': { 'feed_groups': disc.lower() + '_' + classid
class CaveBot(): def __init__(self, username, password, network_id, network_name): self.p = Piazza() self.p.user_login(username, password) self.user_profile = self.p.get_user_profile() self.network = self.p.network(network_id) self.network_name = network_name self.es = Elasticsearch() self.s = Search(using=self.es, index=self.network_name) self.jobs = [] self.es_index_name = network_name self.index2nr = None self.nr2index = None self.corpus, self.lda_model, self.id2word, self.cleaned_documents = None, None, None, None self.topic_vectors = None bulk(self.es, self.index_all_with_recommendations()) # For debugging purposes and room for future play, reread every post, put into a .txt file and let lda use that. piazza_class2txt.main(self.network, "cs3169.txt") with open("index2nr.pkl", "rb") as file: self.index2nr = pickle.load(file) with open("nr2index.pkl", "rb") as file: self.nr2index = pickle.load(file) self.num_topics = 3 self.train_iter = 50 self.corpus, self.lda_model, self.id2word, self.cleaned_documents = refined_lda.main( "cs3169.txt", num_topics=self.num_topics, iterations=self.train_iter) self.recommend() def main(): if len(sys.argv) != 5: print( "Usage: python3 cave_bot.py username password network_id network_name" ) bot = CaveBot(*sys.argv[1:]) # Recommends once per post, responds to parent answers only def index_all_with_recommendations(self): for post in self.network.iter_all_posts(limit=None): trigger = False latest_state = post["history"][0] if "!RecommendMe" in latest_state[ "subject"] or "!RecommendMe" in latest_state["content"]: trigger = True # Doesn't look for its children for child in post["children"]: # felt cleaner than child.get if "history" in child: if "!RecommendMe" in child["history"][0]["content"]: trigger = True if "!@#$" in child["history"][0]["content"][:7]: trigger = False break else: if "!RecommendMe" in child["subject"]: trigger = True if "!@#$" in child["subject"][:7]: trigger = False break if trigger: self.jobs.append(post) i_answer = "" s_answer = "" for child in post["children"]: if child["type"] == "i_answer": i_answer = child["history"][0]["content"] if child["type"] == "s_answer": s_answer = child["history"][0]["content"] yield { "_op_type": "index", "_index": self.es_index_name, "_id": post["nr"], "_source": { "subject": latest_state["subject"], "content": latest_state["content"], "i_answer": i_answer, "s_answer": s_answer, "responded": trigger } } def recommend(self, debug=True): # default to true to not accidentally post for post in self.jobs: response = "!@#$\n" # response = str(recommend_with_mlt(search, post)) + "\n" # print(int(post["nr"])-1, len(corpus)) # # response += "Topic of this post: " + str(lda_model[corpus[int(post["nr"])-2]]) topic = self.topic_of(self.nr2index[post["nr"]]) response += "Topic of this post: " + str(topic) + "\n" # response += "Topic contents " + str(self.lda_model.show_topic(topic)) + "\n" response += "Contributive contents: " + str([ "(" + pair[0] + ", " + str(pair[1])[:4] + ")" for pair in self.lda_model.show_topic(topic) ]) + "\n" # # #may be useful for debugging # # response += str([id2word[id_[0]] for id_ in corpus[int(post["nr"])-2]]) # response += "Post number: " + str(post["nr"]) + "\n" # response += "Post content: " + post["history"][0]["content"] + "\n" # response += str([self.id2word[id_[0]] for id_ in self.corpus[self.nr2index[post["nr"]]]]) # response += "\n\n" # # response += str(self.get_posts_with_same_best_topic(post["nr"], topic)) + "\n" response += "Posts with same topic: " + str( self.get_posts_with_same_topic(post["nr"], topic)) if not debug: result = network.create_followup( post["children"][1], response) # both nr and entire post works else: print("#### Start Post ####") print(response) print("#### End Post ####") print("train_iter, num_topics: ", self.train_iter, self.num_topics) # deprecated and bad def get_posts_with_same_best_topic(self, post_number, target_topic, num_docs=3): # do one up one down docs = [] looked = 0 while len(docs) < 3 or looked > 100: looked += 1 nrs = [post_number + looked, post_number - looked] for nr in nrs: if nr in self.nr2index and nr != -1 and self.topic_of( self.nr2index[nr]) == target_topic: docs.append("@" + str(nr)) return docs[:num_docs] # extra atrocious def get_posts_with_same_topic(self, number, target_topic, num_docs=3): self.get_topic_vectors() vector = self.topic_vectors[self.nr2index[number]] min_dists = [float("inf") for i in range(num_docs + 1)] min_indices = [0 for i in range(num_docs + 1)] for i in range(len(self.topic_vectors)): distance = np.sqrt(np.sum((vector - self.topic_vectors[i])**2, 0)) if distance < max(min_dists): i_md = min_dists.index(max(min_dists)) print(i, distance) min_dists[i_md] = distance min_indices[i_md] = i posts = ["@" + str(self.index2nr[i]) for i in min_indices] posts.remove("@" + str(number)) # no need to recommend the same post return posts def topic_of(self, document): topics = [ topic for index, topic in self.lda_model[self.corpus[document]] ] return topics.index(max(topics)) def get_topic_vectors(self): docs_topics = [] for i in range(len(self.corpus)): doc_topics = self.lda_model.get_document_topics(self.corpus[i]) if len(doc_topics) == self.num_topics: docs_topics.append( np.array([component[1] for component in doc_topics])) else: topics = [] d_pop = doc_topics.pop(-1) for i in range(self.num_topics - 1, -1, -1): if i != d_pop[0] or len(doc_topics) == 0: topics.append(0) else: topics.append(d_pop[1]) d_pop = doc_topics.pop(-1) topics.reverse() docs_topics.append(np.array(topics)) self.topic_vectors = docs_topics def recommend_with_mlt(self, post, score_limit=0): latest_state = post["history"][0] # Can do post likelyhood later search_text = latest_state["subject"] + latest_state["content"] # maybe static this later mlt_query = { "more_like_this": { "fields": ["subject", "content", "i_answer", "s_answer"], "like": search_text, "min_term_freq": 1, "min_doc_freq": 1, "max_query_terms": 50, # "term_vector": "with_positions_offsets" } } recommendation = [] docs = s.query(mlt_query).execute() for doc in docs: if int(doc.meta["id"]) != int( post["nr"]) and doc.meta["score"] > score_limit: recommendation.append((doc.meta["id"], doc.meta["score"])) return recommendation def change_network(self, network_id): self.network = self.p.network(network_id) self.s = Search(using=self.es, index=self.network_name) self.es_index_name = network_name
print info f2.write(time.strftime("%c") + " - " + info + "\n\n") sys.exit() # separating topics, fixed. topics = [x.strip() for x in data[0].split(',')] ## Generate Queries File generatequeries(topics) target_course = str(data[1].strip()) last_cid = int(data[2].strip()) f2.close() f1.close() course = p.network(target_course) result = [] with open("./postsdataset/transit.dat", "w") as f: max_cid = 0 # get limit+1 posts. E.g. limit=10 will only get you 9 posts posts = course.iter_all_posts(limit=51) for post in posts: if "#pin" in str(post["history"][0]): continue cid = str(post["nr"]) max_cid = max(max_cid, int(cid)) if int(cid) == last_cid: break
from piazza_api import Piazza p=Piazza() p.user_login('*****@*****.**','Cxy3020840!') eece210 = p.network("ieby1xzit8r1ki") post=eece210.get_post(29) s=post['history'][0] print s['content']
import json from time import sleep from piazza_api import Piazza import config username = config.username password = config.password class_id = 'is0q8vy2dsm6yx' p = Piazza() p.user_login(email=username, password=password) cis121 = p.network(class_id) # get the total number of posts stats = cis121.get_statistics() # get it using the daily posts in the statistics page total_num_posts = sum(d['questions'] for d in stats['daily']) # now load the "class database" try: with open(config.LOCAL_DATA_FILE_NAME, 'r') as data_file: try: current_data = json.load(data_file) except ValueError: # empty file current_data = {} except IOError: #file didn't exist current_data = {} counter = 0 limit = 10
def get(self, course_id, operation): if operation == 'get': cursor = g.db.execute('''SELECT course_name, piazza_cid FROM courses WHERE course_id=(?)''', [int(course_id)]) row = cursor.fetchone() if row is None: raise InvalidUsage('Given course does not exist') else: course_name_str = row['course_name'] piazza_id_str = row['piazza_cid'] return jsonify(message='Returning course info', course_id=course_id, course_name=course_name_str, piazza_cid=piazza_id_str) # @deprecated: use 'get' end point instead if operation == 'getname': cursor = g.db.execute('''SELECT course_name FROM courses WHERE course_id=(?)''', [int(course_id)]) course_name_row = cursor.fetchone() if course_name_row is None: raise InvalidUsage('Given course does not have a name') else: course_name_str = course_name_row['course_name'] return jsonify(message='Returning name for course', course_id=course_id, course_name=course_name_str) # @deprecated: use 'get' end point instead elif operation == 'getpiazza': cursor = g.db.execute('''SELECT piazza_cid FROM courses WHERE course_id=(?)''', [int(course_id)]) piazza_id_row = cursor.fetchone() if piazza_id_row is None: raise InvalidUsage('Given course does not have a Piazza ID') else: piazza_id_str = piazza_id_row['piazza_cid'] return jsonify(message='Returning piazza ID for course', course_id=course_id, piazza_cid=piazza_id_str) elif operation == 'getpiazzaposts': cursor = g.db.execute('''SELECT piazza_cid FROM courses WHERE course_id=(?)''', [int(course_id)]) piazza_id_row = cursor.fetchone() if piazza_id_row is None: raise InvalidUsage('Given course does not have a Piazza ID') else: piazza_id_str = piazza_id_row['piazza_cid'] p = Piazza() try: with open(USER_FILE, 'r') as fname: lines = fname.read().split('\n') p.user_login(email=lines[0], password=lines[1]) piazza_class = p.network(piazza_id_str) except IndexError: raise InvalidUsage('Piazza credentials are improperly formatted', status_code=501) except IOError: raise InvalidUsage('Unable to find piazza credentials', status_code=500) except AuthenticationError: raise InvalidUsage('Invalid pizza credentials') # Attempt to pull a single post. If it doesn't work, we should # throw an error try: for k in piazza_class.iter_all_posts(limit=1): single_post = k except RequestError: raise InvalidUsage('Invalid piazza course ID', status_code=500) def get_posts(): for post in piazza_class.iter_all_posts(): yield json.dumps(post) return Response(get_posts(), mimetype='application/json') else: raise InvalidUsage('Unknown operation type')
from webargs.flaskparser import use_kwargs, parser from flask_pymongo import PyMongo import re import requests import json import random app = Flask(__name__) app.config[ "MONGO_URI"] = "mongodb+srv://aws-lambda:[email protected]/test?retryWrites=true&w=majority" api = Api(app) mongo = PyMongo(app) get = Piazza() get.user_login("*****@*****.**", "hackgtdummy") cs101 = get.network("k26wh1bxb6imp") BASE_AMAZON_URI = "https://api.amazonalexa.com/v1/" CLIENT_ID = "amzn1.application-oa2-client.efe9993510944061af51f4e0980f3856" CLIENT_SECRET = "38d0847f8a78c5fb7ddbeefd569f6a2cdb1a5a0852f22b880602ed1cd2f5882f" def cleanhtml(raw_html): cleanr = re.compile('<.*?>') cleantext = re.sub(cleanr, '', raw_html) return cleantext # GET: Get all posts on piazza room #
import pandas as pd import json import numpy as np from piazza_api import Piazza df_user = pd.read_json('config.json', typ="series"); p = Piazza() p.user_login(df_user["user"], df_user["pass"]) cogs108 = p.network(df_user["network"]) posts = cogs108.iter_all_posts(limit=None) dict_posts = [] follow_ups = [] for post in posts: #if you would like to pretty print the data #the built in program already anonymizes all user names to a default: "no" #print(json.dumps(post, indent=4, sort_keys=True)) if post['status'] != 'private': dict_post1 = {'content': post['history'][0]['content']} dict_posts.append(dict_post1) follow_ups.append(len(post['children'])) data = { "posts": dict_posts, "follow-ups": follow_ups } with open('posts.json', 'w') as outfile:
class Scraper: """ Usage for Scraper: >>> s = Scraper() # Initializes scraper/db connection >>> s.get(10) #Fetches 10 posts and stores them >>> s.print_topics() # Prints list of current topics/tags >>> s.print_posts() # Prints all posts in DB """ def __init__(self,days_refresh=10): self.piazza = Piazza() self.piazza.user_login(email=Config.username, password=Config.password) self.course = self.piazza.network(Config.courseid) self.engine = create_engine('sqlite:///test.db', echo=False) self.Session = sessionmaker(bind=self.engine) self.session = self.Session() self.days = days_refresh def parse(self): self.refetch_stats() print("Finished getting stats") self.delete_recent() self.get_recent_posts() def get_recent_posts(self): """ Starts the scraper, and stores a certain number of posts to the database: this is the primary method to be used in scraping <Possible Error> Currently, if you run the scraper twice it will duplicate -- must look into methods to fix that (probably using ID) Parameters: limit - Number of posts to fetch Returns: None """ for _,post in enumerate(self.course.iter_all_posts()): if not self.process_one(post): return print(_,post['history'][0]['subject']) def process_one(self, post): """ Takes a post from the Piazza API, converts it into a format ready for the database, and stores it """ time = parse_time(post['created']) duplicate = self.session.query(Post).filter(Post.time == time).first() title = remove_tags(post['history'][0]['subject']) body = remove_tags(post['history'][0]['content']) views = post['unique_views'] favorites = post['num_favorites'] if duplicate is not None: if post['bucket_name'] == 'Pinned': return True return False sqlpost = Post(title, body, time, views, favorites) # Adding Comments for comment in process_all_children(post): time = parse_time(comment['created']) type = comment['type'] if 'history' not in comment: subject = remove_tags(comment['subject']) else: subject = remove_tags(comment['history'][0]['content']) sqlpost.comments.append(Comment(subject, time, type)) #Adding Tags for tag in post['tags']: sqlpost.tags.append(self.get_tag(tag)) #Saving to Database self.session.add(sqlpost) self.session.commit() return True def get_tag(self, name): """ Returns the topic if it exists, and otherwise creates it """ tag = self.session.query(Tag).filter(Tag.name == name).first() if not tag: tag = Tag(name) self.session.add(tag) self.session.commit() return tag def print_posts(self): """ Prints a list of all posts currently in the database """ posts = self.session.query(Post).all() for post in posts: print(post) for comment in post.children: print("\t", comment) def print_tags(self): """ Prints a list of topics currently registerd """ topics = self.session.query(Tag).all() for topic in topics: print(topic) def delete_all(self): self.session.query(Post).delete() self.session.query(Comment).delete() self.session.query(Tag).delete() def delete_recent(self): recent = datetime.now()-timedelta(days=self.days) self.session.query(Post).filter(Post.time > recent).delete() def refetch_stats(self): mostrecent = s.session.query(DayStats).order_by(DayStats.day.desc()).first() if mostrecent is not None: delta = datetime.now()-mostrecent.day if mostrecent is None or delta > timedelta(days=1): stats = self.course.get_statistics() self.session.query(DayStats).delete() for daily in stats['daily']: day = parse_day(daily['day']) self.session.add(DayStats(daily['questions'],day,daily['users'],daily['posts'])) self.session.commit() print("Finished updating statistics")
def get_data(self): return ''.join(self.fed) def strip_tags(html): s = MLStripper() s.feed(html) return s.get_data() p = Piazza() p.user_login("*****@*****.**", "59170864aS@") #TODO UPON ASKING FOR USER CREDENTIALS, LOGIN TO THEIR PIAZZA ACCOUNT stat200101 = p.network("jz1p1pgbr9e15c") classes = p.get_user_classes() print(classes) print(stat200101) # this gets the contents of a post: the question tempPost = stat200101.get_post("361") tempPostContent = tempPost["history"][0]["content"] strippedQuestion = strip_tags(tempPostContent) tempPostAns = tempPost["children"][0]["history"][0]["content"] strippedAns = strip_tags(tempPostAns) # go through all the posts, aggregate them in a datastructure # store that in for easy access postQuestions = [] postAnswers = []
from piazza_api import Piazza import json import sys p = Piazza() p.user_login() course = p.network(sys.argv[1]) mapSave = {} posts = course.iter_all_posts(limit=100000000000) for post in posts: content = post["history"][0]["content"] id = post["nr"] print (id) mapSave[id] = content with open("posts183.json","wb") as f: f.write(json.dumps(mapSave))
import bs4 # In[2]: p = Piazza() # In[5]: p.user_login() # In[6]: spark = p.network("i9esrcg0gpf8k") # In[46]: out ="" failed= [] for i in range(1,spark.get_statistics()["total"]["questions"]): try : post = spark.get_post(i) text = bs4.BeautifulSoup(post["history"][0]["content"]).getText() out = out +" "+ text except Exception, e: failed.append(i)
def get_all_online_data(piazza_class): # Appends this string to the class name when storing the data export_file_suffix = "_posts.csv" # Read credentials with open(config_file) as f: content = f.readlines() creds = [x.strip() for x in content] username = creds[0] password = creds[1] p = Piazza() p.user_login(username, password) class_122_s17 = p.network("ix087c2ns5p656") class_122_f15 = p.network("idt0ymj51qr5do") class_122_f16 = p.network("irz1akgnpve6eo") class_381 = p.network("ixz5scp9zqi583") class_601 = p.network("ixs4v2xr1cz10d") ### Change this to download a different class's data class_to_download = None if piazza_class == "122_s17": class_to_download = class_122_s17 elif piazza_class == "122_f15": class_to_download = class_122_f15 elif piazza_class == "122_f16": class_to_download = class_122_f16 elif piazza_class == "381": class_to_download = class_381 elif piazza_class == "601": class_to_download = class_601 else: raise ValueError("Invalid class name") export_file = piazza_class + export_file_suffix ### # Calling with no argument gets all posts all_responses = class_to_download.iter_all_posts() # all_responses = [class_to_download.get_post(2806)] print "Finished getting all posts" out_file = open(export_file, "wb") csv_writer = csv.writer(out_file) # Write column headers of feature names csv_writer.writerow(TITLE_FIELDS) count = 0 for response in all_responses: # for r in response: # print r, response[r] fields_dict = get_relevant_fields(response) # print fields_dict["tags"] if fields_dict == None: continue values = [fields_dict[k] for k in TITLE_FIELDS] line = ','.join(values) try: csv_writer.writerow(values) except Exception as e: print "Failed on id=", fields_dict["cid"] print(e) exit(1) if count % 200 == 0: print count count += 1 out_file.close()
class PiazzaHandler: """ Handles requests to a specific Piazza network. Requires an e-mail and password, but if none are provided, then they will be asked for in the console (doesn't work for Heroku deploys). API is rate-limited (max is 55 posts in about 2 minutes?) so it's recommended to be conservative with fetch_max, fetch_min and only change them if necessary. All `fetch_*` functions return JSON directly from Piazza's API and all `get_*` functions parse that JSON. # todo missing docs for some attributes Attributes ---------- name : `str` Name of class (ex. CPSC221) nid : `str` ID of Piazza forum (usually found at the end of a Piazza's home url) email : `str (optional)` Piazza log-in email password : `str (optional)` Piazza password guild : `discord.Guild` Guild assigned to the handler fetch_max : `int (optional)` Upper limit on posts fetched from Piazza. fetch_min: `int (optional)` Lower limit on posts fetched from Piazza. Used as the default value for functions that don't need to fetch a lot of posts """ def __init__(self, name: str, nid: str, email: str, password: str, guild: discord.Guild, fetch_max: int = 55, fetch_min: int = 30): self._name = name self.nid = nid self._guild = guild self._channels = [] self.url = f"https://piazza.com/class/{self.nid}" self.p = Piazza() self.p.user_login(email=email, password=password) self.network = self.p.network(self.nid) self.fetch_max = fetch_max self.fetch_min = fetch_min @property def piazza_url(self) -> str: return self.url @piazza_url.setter def piazza_url(self, url: str) -> None: self.url = url @property def course_name(self) -> str: return self._name @course_name.setter def course_name(self, name: str) -> None: self._name = name @property def piazza_id(self) -> str: return self.nid @piazza_id.setter def piazza_id(self, nid: str) -> None: self.nid = nid @property def guild(self) -> discord.Guild: return self._guild @guild.setter def guild(self, guild: discord.Guild) -> None: self._guild = guild @property def channels(self) -> List[int]: return self._channels @channels.setter def channels(self, channels: List[int]) -> None: self._channels = channels def add_channel(self, channel: int) -> None: if channel not in self.channels: self._channels.append(channel) def remove_channel(self, channel: int) -> None: if channel in self.channels: self._channels.remove(channel) def fetch_post_instance(self, post_id: int) -> dict: """ Returns a JSON object representing a Piazza post with ID `post_id`, or returns None if post doesn't exist Parameters ---------- post_id : `int` requested post ID """ try: post = self.network.get_post(post_id) except piazza_api.exceptions.RequestError as ex: raise InvalidPostID("Post not found.") from ex if self.check_if_private(post): raise InvalidPostID("Post is Private.") return post async def fetch_recent_notes(self, lim: int = 55) -> List[dict]: """ Returns up to `lim` JSON objects representing instructor's notes that were posted today Parameters ---------- lim : `int (optional)` Upper limit on posts fetched. Must be in range [fetch_min, fetch_max] (inclusive) """ posts = await self.fetch_posts_in_range(days=0, seconds=60 * 60 * 5, lim=lim) response = [] for post in posts: if post["tags"][0] == "instructor-note" or post[ "bucket_name"] == "Pinned": response.append(post) return response def fetch_pinned(self, lim: int = 0) -> List[dict]: """ Returns up to `lim` JSON objects representing pinned posts\n Since pinned posts are always the first notes shown in a Piazza, lim can be a small value. Parameters ---------- lim : `int` Upper limit on posts fetched. Must be in range [fetch_min, fetch_max] (inclusive) """ posts = self.network.iter_all_posts(limit=lim or self.fetch_min) response = [] for post in posts: if self.check_if_private(post): continue if post["bucket_name"] and post["bucket_name"] == "Pinned": response.append(post) return response async def fetch_posts_in_range(self, days: int = 1, seconds: int = 0, lim: int = 55) -> List[dict]: """ Returns up to `lim` JSON objects that represent a Piazza post posted today """ if lim < 0: raise ValueError(f"Invalid lim for fetch_posts_in_days(): {lim}") posts = [] feed = self.network.get_feed(limit=lim, offset=0) for cid in map(itemgetter("id"), feed["feed"]): post = None retries = 5 while not post and retries: try: post = self.network.get_post(cid) except piazza_api.exceptions.RequestError as ex: retries -= 1 if "foo fast" in str(ex): await asyncio.sleep(1) else: break if post: posts.append(post) date = datetime.date.today() result = [] for post in posts: # [2020,9,19] from 2020-09-19T22:41:52Z created_at = [int(x) for x in post["created"][:10].split("-")] created_at = datetime.date(created_at[0], created_at[1], created_at[2]) if self.check_if_private(post): continue elif (date - created_at).days <= days and ( date - created_at).seconds <= seconds: result.append(post) return result def get_pinned(self) -> List[dict]: """ Returns an array of `self.min` objects containing a pinned post's post id, title, and url. """ posts = self.fetch_pinned() response = [] for post in posts: post_details = { "num": post["nr"], "subject": post["history"][0]["subject"], "url": f"{self.url}?cid={post['nr']}", } response.append(post_details) return response def get_post(self, post_id: int) -> typing.Union[dict, None]: """ Returns a dict that contains post information to be formatted and returned as an embed Parameters ---------- post_id : `int` int associated with a Piazza post ID """ post = self.fetch_post_instance(post_id) if post: post_type = "Note" if post["type"] == "note" else "Question" response = { "subject": self.clean_response(post["history"][0]["subject"]), "num": f"@{post_id}", "url": f"{self.url}?cid={post_id}", "post_type": post_type, "post_body": self.clean_response(self.get_body(post)), "i_answer": None, "s_answer": None, "num_followups": 0 } answers = post["children"] if answers: num_followups = 0 for answer in answers: if answer["type"] == "i_answer": response["i_answer"] = self.clean_response( self.get_body(answer)) elif answer["type"] == "s_answer": response["s_answer"] = self.clean_response( self.get_body(answer)) else: num_followups += self.get_num_follow_ups(answer) response.update({"num_followups": num_followups}) response.update({"tags": ", ".join(post["tags"] or "None")}) return response else: return None def get_num_follow_ups(self, answer: dict) -> int: return 1 + sum(self.get_num_follow_ups(i) for i in answer["children"]) async def get_posts_in_range(self, show_limit: int = 10, days: int = 1, seconds: int = 0) -> List[List[dict]]: if show_limit < 1: raise ValueError( f"Invalid showLimit for get_posts_in_range(): {show_limit}") posts = await self.fetch_posts_in_range(days=days, seconds=seconds, lim=self.fetch_max) instr, stud = [], [] response = [] def create_post_dict(post: dict, tag: str) -> dict: return { "type": tag, "num": post["nr"], "subject": self.clean_response(post["history"][0]["subject"]), "url": f"{self.url}?cid={post['nr']}" } def filter_tag(post: dict, arr: List[dict], tagged: str) -> None: """Sorts posts by instructor or student and append it to the respective array of posts""" for tag in post["tags"]: if tag == tagged: arr.append(create_post_dict(post, tag)) break # first adds all instructor notes to update, then student notes # for student notes, show first 10 and indicate there's more to be seen for today for post in posts: filter_tag(post, instr, "instructor-note") if len(posts) - len(instr) <= show_limit: for p in posts: filter_tag(p, stud, "student") else: for i in range(show_limit + 1): filter_tag(posts[i], stud, "student") response.append(instr) response.append(stud) return response async def get_recent_notes(self) -> List[dict]: """ Fetches `fetch_min` posts, filters out non-important (not instructor notes or pinned) posts and returns an array of corresponding post details """ posts = await self.fetch_recent_notes(lim=self.fetch_min) response = [] for post in posts: post_details = { "num": post["nr"], "subject": self.clean_response(post["history"][0]["subject"]), "url": f"{self.url}?cid={post['nr']}" } response.append(post_details) return response def check_if_private(self, post: dict) -> bool: return post["status"] == "private" def clean_response(self, res: str) -> str: if len(res) > 1024: res = res[:1000] res += "...\n\n *(Read more)*" tag_regex = re.compile("<.*?>") res = html.unescape(re.sub(tag_regex, "", res)) if len(res) < 1: res += "An image or video was posted in response." return res def get_body(self, res: dict) -> str: body = res["history"][0]["content"] if not body: raise Exception("Body not found.") return body
p = Piazza() p.user_login('*****@*****.**', 'thomaslau') f = open('userProfile.txt','w') json.dump(p.get_user_profile(), f) f.close() rawUserData = open('userProfile.txt') jsonUserData = json.load(rawUserData) rawUserData.close() masterPath = os.getcwd() for i in jsonUserData["all_classes"]: classConnection = p.network(i) posts = classConnection.iter_all_posts(limit=100000) className = jsonUserData["all_classes"][i]["num"] if className == "CS 103": x = 0 elif className == "CS 110": x = 0 else: continue # reset to the relative root os.chdir(masterPath) if not os.path.exists(masterPath + "/" + className): os.mkdir(masterPath + "/" + className)