class PiazzaScraper:

    # class_code refers to the k12qbt838di4xt in https://piazza.com/class/k12qbt838di4xt
    def __init__(self, class_code, username, password):
        self.piazza = Piazza()
        self.piazza.user_login(username, password)
        self.piazza_class = self.piazza.network(class_code)

    def get_all_posts(self):
        # get_all_posts returns a list of dictionaries. The dictionaries contains information of the thread that resides
        # in the Piazza classroom
        feed = self.piazza_class.get_feed(limit=999999, offset=0)
        thread_ids = [post['id'] for post in feed["feed"]]
        posts = []
        for thread_id in thread_ids:
            posts.append(self.piazza_class.get_post(thread_id))
        return posts

    def get_post(self, thread_id):
        # get_post allows the user to get the dictionary that contains information of a single thread
        return self.piazza_class.get_post(thread_id)

    def get_users(self):
        # get_users allows the user to get a list of dictionaries that contains the profile of a Piazza user
        return self.piazza_class.get_all_users()

    def get_user(self, user_id):
        # get_users allows the user to get a dictionary that contains the profile of a Piazza user
        return self.piazza_class.get_users(user_id)
Example #2
0
def checkPiazza():
    client = pymongo.MongoClient(pizzapizzasecret.dbsecret)
    db = client.get_database('piazza-posts')
    table = db.posts
    existingPosts = []
    query = table.find()
    print(query)
    output = {}
    i = 0
    for x in query:
        output[i] = x
        output[i].pop('_id')
        existingPosts.append(output[i]['ID'])
        i += 1
    print(existingPosts)
    p = Piazza()
    p.user_login(pizzapizzasecret.email, pizzapizzasecret.password)
    ds = p.network(pizzapizzasecret.net)
    #print("ds", ds.iter_all_posts())
    posts = ds.iter_all_posts()
    for post in posts:
        #print("post", post)
        if post['history'][0]['subject'] not in existingPosts:
            payload = post['history'][0]['subject']
            print(payload)
            queryObject = {
                'ID': payload,
            }
            queryMongo = table.insert_one(queryObject)
            sendPayload(post)

        else:
            print("piazza channel up to date")
    return "success"
Example #3
0
def create_piazza_post(title: str, content: str, folder: str) -> int:
    """Create a piazza post from the given parameters and return the post's CID."""
    EMAIL = Config.get_global().piazza_email
    PWD = Config.get_global().piazza_password
    ID = Config.get_global().piazza_id

    p = Piazza()
    p.user_login(EMAIL, PWD)
    post = p.network(ID)

    while True:
        try:
            result = post.create_post(
                title=title,
                content=content,
                nid=ID,
                folder=folder
            )
        except piazza_api.exceptions.RequestError as e:
            str_e = str(e)
            if "posting too quickly" in str_e:
                # Piazza has posting time limits, grumble grumble
                # "Sorry! It looks like you are posting too quickly--wait a second and try again."

                # Try again
                time.sleep(0.4)
                continue
            else:
                # Don't handle this
                raise e

        # All done now!
        break

    return int(result['nr'])
Example #4
0
def post_aggregate(email, password, coursecode):
    # User credentials
    piazza = Piazza()
    # Login
    piazza.user_login(email, password)

    # get course code to hash dictionary
    coursetohash = course_hash(email, password)

    # get classroom object using the hash
    classroom = piazza.network(coursetohash[coursecode])

    # go through all the posts, aggregate them in a data-structure
    postquestions = []

    # board_posts type is generator: cannot access memory in the lazy list
    board_posts = classroom.iter_all_posts(limit=const_limit)

    # iterate through board posts
    for post in board_posts:
        # get rid of html tags in question in post
        question_string = strip_tags(post["history"][0]["content"])

        # append to questions array
        postquestions.append(question_string)

    return postquestions
Example #5
0
    def perform_action(action,
                       course,
                       as_staff=False,
                       is_test=None,
                       kwargs=None):
        with connect_db() as db:
            if as_staff:
                user, pw = db(
                    "SELECT staff_user, staff_pw FROM piazza_config WHERE course = (%s)",
                    [course],
                ).fetchone()
            else:
                user, pw = db(
                    "SELECT student_user, student_pw FROM piazza_config WHERE course = (%s)",
                    [course],
                ).fetchone()
            if is_test:
                (course_id, ) = db(
                    "SELECT test_course_id FROM piazza_config WHERE course = (%s)",
                    [course],
                ).fetchone()
            else:
                (course_id, ) = db(
                    "SELECT course_id FROM piazza_config WHERE course = (%s)",
                    [course]).fetchone()

        p = Piazza()
        p.user_login(user, pw)
        course = p.network(course_id)
        if kwargs is None:
            kwargs = {}
        try:
            return getattr(course, action)(**kwargs)
        except Exception as e:
            return str(e), 400
Example #6
0
 def setUp(self):
     username = config.username
     password = config.password
     class_id = 'idj5lp6gixc6xn'
     p = Piazza()
     p.user_login(email=username, password=password)
     self.cis121 = p.network(class_id)
Example #7
0
async def send_new_posts():
    load_sent_ids()
    print('\n\nloaded')
    print(sent_ids)
    print('\n\n')

    p = Piazza()
    p.user_login("email", "pass")

    user_status = p.get_user_status()
    nid = user_status['networks'][0]['id']

    eecs2030 = p.network(nid)
    feed = eecs2030.get_feed(limit=99999)

    feed_arr = feed['feed']
    piazzaobj_arr = []

    for post in feed_arr:
        full_post = eecs2030.get_post(post['id'])
        latest_update = full_post['history'][0]
        piazzaobj_arr.append(
            PiazzaObject(post['id'], latest_update['subject'],
                         latest_update['content'], full_post['created'],
                         'https://piazza.com/class?cid=' + post['id']))

    print(sent_ids)
    for piazzaobj in piazzaobj_arr:
        if not piazzaobj.id in sent_ids:
            print(f'sending {piazzaobj.id}')
            await send_post(piazzaobj)
            sent_ids.append(piazzaobj.id)

    save_sent_ids()
def main(argv=sys.argv[1:]):
    argument_parser = argparse.ArgumentParser()
    # argument_parser.add_argument('--username', type=str,
    #         default='PiazzaBot', help='the bot\'s Keybase username')
    args = argument_parser.parse_args(argv)

    logger.info('keybase_piazza_bot starting')

    event_loop = asyncio.get_event_loop()

    piazza = Piazza()
    piazza.user_login() # login prompt
    cse220 = piazza.network(PIAZZA_NETWORK_ID)

    keybase_bot_handler = KeybaseBotHandler(piazza, cse220)
    keybase_bot = Bot(
        handler=keybase_bot_handler,
        loop=event_loop,
        # username=args.username,
    )
    future = keybase_bot.start({})

    event_loop.run_until_complete(future)
    event_loop.close()

    logger.info('keybase_piazza_bot exiting')
class PiazzaWrapper:
    def __init__(self, course_id='xxxxxxxxxxx'):
        self.p = Piazza()
        email_id = input("Enter your Piazza email ID : ")
        password = getpass('Piazza Password:'******'feed']:
            if post['type'] == 'question':
                # print(post['nr'])
                # print(post['content_snipet'])
                time = post['log'][0]['t']
                time = datetime.datetime.strptime(time[:-1],
                                                  "%Y-%m-%dT%H:%M:%S")
                if time.date() == today.date():
                    count += 1
                    if 'has_i' in post.keys():
                        count_i += 1
                    elif 'has_s' in post.keys():
                        count_s += 1
                    else:
                        count_unanswered += 1
                        unanswered_posts.append(post['nr'])
                    # print(time)
        return count, count_i, count_s, count_unanswered, unanswered_posts

    def get_unanswered_followup(self):
        posts = self.comp_photo19.iter_all_posts()
        count = 0
        for post in posts:
            cid = post['nr']
            content = self.comp_photo19.get_post(cid)
            count += self.traverse_content_tree(content)
        return count

    def traverse_content_tree(self, content):
        count = 0
        if 'children' in content.keys():
            if len(content['children']) > 0:
                for content_children in content['children']:
                    count += self.traverse_content_tree(content_children)
                    if 'no_answer' in content_children.keys():
                        count += content_children['no_answer']
        return count

    def get_count_today(self):
        posts = self.comp_photo19.get_feed(100, 0)
        count, count_i, count_s, count_unanswered, unanswered_posts = self.count_posts(
            posts, datetime.datetime.today())
        return count, count_i, count_s, count_unanswered, unanswered_posts
Example #10
0
def main():
    p = Piazza()
    p.user_login()
    classId = input("Enter your class id")
    classObj = p.network(classId)
    postId = input("Enter post number")
    posts = classObj.get_post(postId)
    all_users = classObj.get_all_users()

    generate_csv(all_users, posts)
Example #11
0
def piazza_reader(email, password, coursecode):
    # User credentials
    piazza = Piazza()
    # Login
    piazza.user_login(email, password)

    class_dictionary = piazza.get_user_classes()

    # dictionary for 'course code: hash_id'
    coursetohash = {}
    # print(len(class_dictionary))
    for i in range(len(class_dictionary)):
        coursetohash.update(
            {class_dictionary[i]['num']: class_dictionary[i]['nid']})

    print(coursetohash)

    classroom = piazza.network(coursetohash[coursecode])

    # print(coursecode)
    # print(coursetohash[coursecode])

    # go through all the posts, aggregate them in a data-structure
    postquestions = []
    postanswers = []

    # board_posts type is generator: cannot access memory in the lazy list
    board_posts = classroom.iter_all_posts(limit=const_limit)

    # iterate through board posts
    for post in board_posts:
        # get rid of html tags in question in post
        question_string = strip_tags(post["history"][0]["content"])

        # append to questions array
        postquestions.append(question_string)

        # checks if there's an answer associated to the question
        if "children" in post.keys(
        ) and post["children"] and "history" in post["children"][0]:

            # for all answers in a single post (iterate)
            for answer_index in range(len(post["children"][0]["history"])):

                # get rid of html tags for answers in the post, and check if the entry is a string
                if type(post["children"][0]["history"][answer_index]
                        ["content"]) == str:
                    answer_string = strip_tags(post["children"][0]["history"]
                                               [answer_index]["content"])

                    # append to answers array
                    postanswers.append(answer_string)

    # print(postQuestions + postAnswers)
    return " ".join(postquestions + postanswers)
Example #12
0
 def _connect_to_piazza(self, piazza_credentials):
     """Connect to Piazza"""
     _piazza = Piazza()
     _piazza.user_login(email=piazza_credentials['piazza_email'],
                        password=piazza_credentials['piazza_password'])
     self._myclass = _piazza.network(piazza_credentials['piazza_network'])
     # Get list of cid's from feed
     self._feed = self._myclass.get_feed(limit=999999, offset=0)
     self._instructor_ids = [
         user['id'] for user in self._myclass.get_all_users()
         if user['admin'] == True
     ]
Example #13
0
def main():
    parser = argparse.ArgumentParser(description='Retrieve Piazza posts and answers in JSON format.')
    parser.add_argument('--networkid', '-n', help='piazza network ID', required=True)
    parser.add_argument('--output', '-o', help='output filename', required=True)
    args = parser.parse_args()

    piazza = Piazza()
    print('Login to Piazza:')
    piazza.user_login()

    network = piazza.network(args.networkid)
    write_all_posts(network, args.output)
Example #14
0
    def create_piazza_bot(user_email, user_password, course_code):
        ''' Method to instantiate a Piazza Bot
    Parameters:
    user_email: Piazza user email to authenticate with
    user_password: Piazza password to authenticate with
    course_code: Class/Course code on Piazza '''

        piazza = Piazza()
        piazza.user_login(email=user_email, password=user_password)
        user_profile = piazza.get_user_profile()
        course_site = piazza.network(course_code)

        return PiazzaBot(piazza, user_profile, course_site)
Example #15
0
class Bot:
    def __init__(self, course_code=config.eecs281):
        self.piazza = Piazza()
        self.piazza.user_login(config.creds['email'], config.creds['password'])
        self.course = self.piazza.network(course_code)

        # rpc api to post notes
        self.piazza_rpc = PiazzaRPC(config.class_code)
        self.piazza_rpc.user_login(config.creds['email'], config.creds['password'])

    def get_all_posts_json(self):
        documents = []
        posts = []
        file_name = '{0}.txt'.format(config.class_code)
        if not os.path.isfile(file_name):
            data = self.course.iter_all_posts(limit=INF)
            for post in data:
                print('downloading post {0}'.format(post['nr']))
                documents.append(post)
                posts.append(Post(post))
            obj = open(file_name, 'wb')
            json.dump(documents, obj)
        else:
            obj = open(file_name, 'r')
            data = json.load(obj)
            for post in data:
                posts.append(Post(post))
        return posts

    def get_all_posts(self, start_id=0, limit=100):
        documents = []
        feed = self.course.get_feed()
        ids = [post['nr'] for post in feed['feed']]
        for post_id in ids:
            if post_id > start_id:
                print('downloading post {0}'.format(post_id))
                post_json = self.course.get_post(post_id)
                documents.append(Post(post_json))

        return documents

    def get_post(self, id):
        return Post(self.course.get_post(id))

    def create_post(self, subject, body, folder=['hw1']):
        params = {'type':'note','subject':subject, 'content':body, 'folders':folder}
        self.piazza_rpc.content_create(params)

    def create_answer(self, post_id, content):
        params = { 'cid': post_id, 'type': 'i_answer', 'content': content, 'revision': 0}
        return self.piazza_rpc.content_instructor_answer(params)
Example #16
0
class PiazzaBot():
	def __init__(self):
		self.p = Piazza()
		self.p.user_login(USER_NAME, USER_PASS)
		self.uid = self.p.get_user_profile()['user_id']

		# classes = self.p.get_user_classes()

		self.si206 = self.p.network("jlp6m1ynp9713y")

	def do_bot(self):
		posts = self.si206.search_feed("lecture")

		for post in posts:
			id = post[u'id']
			print(self.si206.get_post(id)[u'history'][0][u'content'])
			print("\n\n")
Example #17
0
def comment_post_aggregate(email, password, coursecode):
    # User credentials
    piazza = Piazza()
    # Login
    piazza.user_login(email, password)

    # get course code to hash dictionary
    coursetohash = course_hash(email, password)

    # get classroom object using the hash
    classroom = piazza.network(coursetohash[coursecode])

    # go through all the posts, aggregate them in a data-structure
    postquestions = []
    postanswers = []

    # board_posts type is generator: cannot access memory in the lazy list
    board_posts = classroom.iter_all_posts(limit=const_limit)

    # iterate through board posts
    for post in board_posts:
        # get rid of html tags in question in post
        question_string = strip_tags(post["history"][0]["content"])

        # append to questions array
        postquestions.append(question_string)

        # checks if there's an answer associated to the question
        if "children" in post.keys(
        ) and post["children"] and "history" in post["children"][0]:

            # for all answers in a single post (iterate)
            for answer_index in range(len(post["children"][0]["history"])):

                # get rid of html tags for answers in the post, and check if the entry is a string
                if type(post["children"][0]["history"][answer_index]
                        ["content"]) == str:
                    answer_string = strip_tags(post["children"][0]["history"]
                                               [answer_index]["content"])

                    # append to answers array
                    postanswers.append(answer_string)

    # print(postQuestions + postAnswers)
    return postquestions + postanswers
Example #18
0
def pubish_to_piazza(message_contents):
    # Get the meeting URL + meeting password
    message_body = base64.urlsafe_b64decode(
        message_contents["payload"]["body"]["data"]).decode("utf-8").replace(
            "&", "&")
    meeting_url = re.findall('(https?://[^\s]+)', message_body)[2]
    meeting_password = re.findall(r'Access Password: (.{8})', message_body)[0]

    # Post them to Piazza
    piazza_message_content = f"Recording URL: {meeting_url}\nMeeting Password: {meeting_password}"
    piazza_client = Piazza()
    piazza_client.user_login(email=PIAZZA_EMAIL, password=PIAZZA_PASSWORD)
    piazza_class = piazza_client.network(PIAZZA_CLASS_NETWORK_ID)
    piazza_class.create_post(post_type="note",
                             post_folders=["logistics"],
                             post_subject="New Recording Available",
                             post_content=piazza_message_content)

    print("Uploaded new recording to Piazza!")
Example #19
0
def get_headlines():
    """
    Description: This function will start the process of retrieving 
    piazza posts and concatenating them.

    Parameters:
        None

    Return:
        The string for alexa to read out

    """
    #retrieves login information
    with open('config.json') as login_info:
        user_pass_dict = json.load(login_info)

    #creates a session to allow use of amazon
    sess = requests.Session()
    sess.headers.update({'User-Agent': 'I am making a project: shbanki'})

    #creates piazza object and logs into account
    p = Piazza()
    p.user_login(user_pass_dict["user"], user_pass_dict["passwd"])

    #goes to specific class information
    class_board = p.network("jsux9glwaxm4m")

    post_index = 1
    number_of_posts = 3

    data = class_board.iter_all_posts(number_of_posts)

    current_post = next(data)
    output = concatenate_post(current_post)

    while (number_of_posts > post_index):
        output += 'The next post reads... '
        current_post = next(data)
        output += concatenate_post(current_post)
        post_index += 1

    return output
Example #20
0
def main():

    # Read all relevant config variables
    conf = config_env()

    # Setup Piazza
    piazza = Piazza()
    piazza.user_login(email=conf.PIAZZA_EMAIL, password=conf.PIAZZA_PASSWORD)
    network = piazza.network(conf.PIAZZA_ID)

    # Setup Slack
    bot = Slacker(conf.SLACK_TOKEN)

    # Get the last posted_id
    last_id = get_max_id(network.get_feed()['feed'])
    post_base_url = "https://piazza.com/class/{}?cid=".format(conf.PIAZZA_ID)
    
    # Run loop
    check_for_new_posts(network, bot, conf.SLACK_BOT_NAME, 
                        conf.SLACK_CHANNEL, last_id, post_base_url)
Example #21
0
def main(piazza_class, output_filename):
    # Use Piazza API (https://github.com/hfaran/piazza-api) to login and fetch all posts for given class
    p = Piazza()
    p.user_login()
    piazza_class = p.network(CLASS_NETWORK_IDS[piazza_class])
    posts = piazza_class.iter_all_posts()

    f = open(output_filename, 'w')

    num_posts = 0
    for post in posts:
        if 'type' in post:
            if unicode2str(post['type']) == 'question':
                title = ''
                question = ''
                answer = ''
                timestamp = unicode2str(post['created'])
                tags = ' '.join(post['tags'])

                if post['history']:
                    if post['history'][0]['subject']:
                        title = unicode2str(post['history'][0]['subject'])
                    if post['history'][0]['content']:
                        question = unicode2str(post['history'][0]['content'])
                if post['children']:
                    if post['children'][0]:
                        if 'history' in post['children'][0]:
                            answer = unicode2str(
                                post['children'][0]['history'][0]['content'])

                # Print each post as a single line in output file, with parts of posts delimited by '@@@'
                try:
                    f.write(title + '@@@' + question + '@@@' + answer + '@@@' +
                            timestamp + '@@@' + tags + '\n')
                except UnicodeEncodeError as err:
                    print err
                    num_posts -= 1
                num_posts += 1
    print('Scraped %s posts\n' % num_posts)
Example #22
0
from piazza_api import Piazza
import json
import sys

p = Piazza()
p.user_login()

course = p.network(sys.argv[1])

mapSave = {}

posts = course.iter_all_posts(limit=100000000000)
for post in posts:
    content = post["history"][0]["content"]
    id = post["nr"]
    print(id)
    mapSave[id] = content

with open("posts183.json", "wb") as f:
    f.write(json.dumps(mapSave))
Example #23
0
    class_code = input('Please enter the course code or enter to start parsing. To find the course code, visit the Piazza class and copy the last part of the URL (i.e for https://piazza.com/class/asdfghjkl, paste in \'asdfghjkl\'): ')
    if class_code == '':
        add_another_class = False
        continue
    class_name = input('Please enter the class name for this code: ')
    classes[class_code] = class_name
    continue_input = input(
        'Enter "begin" to start parsing the Piazza networks OR press enter to add another class: ')
    if continue_input == 'begin':
        add_another_class = False

total_posts = 0

# TODO refactor this procedural logic into a more readable class
for class_code, class_name in classes.items():
    piazza_class = p.network(class_code)
    output_file = class_name + '_network.gexf'

    feed = piazza_class.get_feed(limit=10000)
    cids = [post['id'] for post in feed["feed"]]
    edges = dict()
    nodes = set()
    node_sizes = dict()
    node_interactions = dict()

    post = None

    rootLogger.info('Parsing class %s with code %s', class_name, class_code)
    i = 0

    for post_id in cids:
Example #24
0
import logging
import json
logging.captureWarnings(True)

if len(sys.argv) > 2:
    search_query = sys.argv[1]
else:
    search_query = "Nothing"

p = Piazza()


password = ""
with open("password.txt") as f:
    password = f.read()


p.user_login("*****@*****.**", password)

course = p.network("i4skbzt4mxk3ck")
data = {}
with open("test.txt", "w") as g:

    for i in course.iter_all_posts(limit=100):
        print i.get("history")[0]["subject"]
        print "piazza.com/class/i4skbzt4mxk3ck?cid=%d" % i["nr"]
        data[i.get("history")[0]["subject"]] = "piazza.com/class/i4skbzt4mxk3ck?cid=%d" % i["nr"]

    json.dump(data, g)

groupname = args.groupname

content = "What chemistry topics were new to you this past week? What was "\
        "confusing?\n\n Many students have the same confusions, so take a "\
        "look at what your peers have answered. See something you understand "\
        "that they don't? Start a discussion! See something you are also "\
        "confused about? Tell them why!\n\n Post your response in this thread"\
        " before Tuesday's lecture.\n\n #pin"

print('Connecting to Piazza via piazza-api...')

# Piazza setup
p = Piazza()
p.user_login(email=email, password=None)
me = p.get_user_profile()
pclass = p.network(classid)
print('  Logged in as: %s' % me.get('name'))
print('')

for i in range(nsects):
    if (groupname != ''):
        thistitle = '{} {}{}'.format(title, groupname, i + 1)
        disc = groupname + ' ' + str(i + 1)
        params = {
            'status': active,
            'type': 'note',
            'folders': [disc],
            'subject': thistitle,
            'content': content,
            'config': {
                'feed_groups': disc.lower() + '_' + classid
Example #26
0
class CaveBot():
    def __init__(self, username, password, network_id, network_name):
        self.p = Piazza()
        self.p.user_login(username, password)

        self.user_profile = self.p.get_user_profile()
        self.network = self.p.network(network_id)
        self.network_name = network_name

        self.es = Elasticsearch()
        self.s = Search(using=self.es, index=self.network_name)
        self.jobs = []
        self.es_index_name = network_name

        self.index2nr = None
        self.nr2index = None
        self.corpus, self.lda_model, self.id2word, self.cleaned_documents = None, None, None, None

        self.topic_vectors = None

        bulk(self.es, self.index_all_with_recommendations())

        #  For debugging purposes and room for future play, reread every post, put into a .txt file and let lda use that.
        piazza_class2txt.main(self.network, "cs3169.txt")

        with open("index2nr.pkl", "rb") as file:
            self.index2nr = pickle.load(file)

        with open("nr2index.pkl", "rb") as file:
            self.nr2index = pickle.load(file)

        self.num_topics = 3
        self.train_iter = 50
        self.corpus, self.lda_model, self.id2word, self.cleaned_documents = refined_lda.main(
            "cs3169.txt",
            num_topics=self.num_topics,
            iterations=self.train_iter)

        self.recommend()

    def main():
        if len(sys.argv) != 5:
            print(
                "Usage: python3 cave_bot.py username password network_id network_name"
            )

        bot = CaveBot(*sys.argv[1:])

    # Recommends once per post, responds to parent answers only
    def index_all_with_recommendations(self):
        for post in self.network.iter_all_posts(limit=None):
            trigger = False
            latest_state = post["history"][0]
            if "!RecommendMe" in latest_state[
                    "subject"] or "!RecommendMe" in latest_state["content"]:
                trigger = True

            # Doesn't look for its children
            for child in post["children"]:
                # felt cleaner than child.get
                if "history" in child:
                    if "!RecommendMe" in child["history"][0]["content"]:
                        trigger = True
                    if "!@#$" in child["history"][0]["content"][:7]:
                        trigger = False
                        break
                else:
                    if "!RecommendMe" in child["subject"]:
                        trigger = True
                    if "!@#$" in child["subject"][:7]:
                        trigger = False
                        break

            if trigger:
                self.jobs.append(post)

            i_answer = ""
            s_answer = ""
            for child in post["children"]:
                if child["type"] == "i_answer":
                    i_answer = child["history"][0]["content"]
                if child["type"] == "s_answer":
                    s_answer = child["history"][0]["content"]

            yield {
                "_op_type": "index",
                "_index": self.es_index_name,
                "_id": post["nr"],
                "_source": {
                    "subject": latest_state["subject"],
                    "content": latest_state["content"],
                    "i_answer": i_answer,
                    "s_answer": s_answer,
                    "responded": trigger
                }
            }

    def recommend(self,
                  debug=True):  # default to true to not accidentally post
        for post in self.jobs:
            response = "!@#$\n"
            # response = str(recommend_with_mlt(search, post)) + "\n"
            # print(int(post["nr"])-1, len(corpus))
            # # response += "Topic of this post: " + str(lda_model[corpus[int(post["nr"])-2]])
            topic = self.topic_of(self.nr2index[post["nr"]])
            response += "Topic of this post: " + str(topic) + "\n"
            # response += "Topic contents " + str(self.lda_model.show_topic(topic)) + "\n"

            response += "Contributive contents: " + str([
                "(" + pair[0] + ", " + str(pair[1])[:4] + ")"
                for pair in self.lda_model.show_topic(topic)
            ]) + "\n"

            # # #may be useful for debugging
            # # response += str([id2word[id_[0]] for id_ in corpus[int(post["nr"])-2]])
            # response += "Post number: " + str(post["nr"]) + "\n"
            # response += "Post content: " + post["history"][0]["content"] + "\n"
            # response += str([self.id2word[id_[0]] for id_ in self.corpus[self.nr2index[post["nr"]]]])
            # response += "\n\n"
            # # response += str(self.get_posts_with_same_best_topic(post["nr"], topic)) + "\n"

            response += "Posts with same topic: " + str(
                self.get_posts_with_same_topic(post["nr"], topic))

            if not debug:
                result = network.create_followup(
                    post["children"][1],
                    response)  # both nr and entire post works
            else:
                print("#### Start Post ####")
                print(response)
                print("#### End Post ####")
                print("train_iter, num_topics: ", self.train_iter,
                      self.num_topics)

    # deprecated and bad
    def get_posts_with_same_best_topic(self,
                                       post_number,
                                       target_topic,
                                       num_docs=3):
        # do one up one down
        docs = []
        looked = 0
        while len(docs) < 3 or looked > 100:
            looked += 1
            nrs = [post_number + looked, post_number - looked]
            for nr in nrs:
                if nr in self.nr2index and nr != -1 and self.topic_of(
                        self.nr2index[nr]) == target_topic:
                    docs.append("@" + str(nr))

        return docs[:num_docs]

    # extra atrocious
    def get_posts_with_same_topic(self, number, target_topic, num_docs=3):
        self.get_topic_vectors()
        vector = self.topic_vectors[self.nr2index[number]]

        min_dists = [float("inf") for i in range(num_docs + 1)]
        min_indices = [0 for i in range(num_docs + 1)]
        for i in range(len(self.topic_vectors)):
            distance = np.sqrt(np.sum((vector - self.topic_vectors[i])**2, 0))
            if distance < max(min_dists):
                i_md = min_dists.index(max(min_dists))
                print(i, distance)
                min_dists[i_md] = distance
                min_indices[i_md] = i

        posts = ["@" + str(self.index2nr[i]) for i in min_indices]
        posts.remove("@" + str(number))  # no need to recommend the same post
        return posts

    def topic_of(self, document):
        topics = [
            topic for index, topic in self.lda_model[self.corpus[document]]
        ]
        return topics.index(max(topics))

    def get_topic_vectors(self):
        docs_topics = []

        for i in range(len(self.corpus)):
            doc_topics = self.lda_model.get_document_topics(self.corpus[i])
            if len(doc_topics) == self.num_topics:
                docs_topics.append(
                    np.array([component[1] for component in doc_topics]))
            else:
                topics = []
                d_pop = doc_topics.pop(-1)
                for i in range(self.num_topics - 1, -1, -1):
                    if i != d_pop[0] or len(doc_topics) == 0:
                        topics.append(0)
                    else:
                        topics.append(d_pop[1])
                        d_pop = doc_topics.pop(-1)
                topics.reverse()
                docs_topics.append(np.array(topics))

        self.topic_vectors = docs_topics

    def recommend_with_mlt(self, post, score_limit=0):
        latest_state = post["history"][0]
        # Can do post likelyhood later
        search_text = latest_state["subject"] + latest_state["content"]

        # maybe static this later
        mlt_query = {
            "more_like_this": {
                "fields": ["subject", "content", "i_answer", "s_answer"],
                "like": search_text,
                "min_term_freq": 1,
                "min_doc_freq": 1,
                "max_query_terms": 50,
                # "term_vector": "with_positions_offsets"
            }
        }

        recommendation = []
        docs = s.query(mlt_query).execute()

        for doc in docs:
            if int(doc.meta["id"]) != int(
                    post["nr"]) and doc.meta["score"] > score_limit:
                recommendation.append((doc.meta["id"], doc.meta["score"]))

        return recommendation

    def change_network(self, network_id):
        self.network = self.p.network(network_id)
        self.s = Search(using=self.es, index=self.network_name)
        self.es_index_name = network_name
Example #27
0
                print info
                f2.write(time.strftime("%c") + "   -   " + info + "\n\n")
                sys.exit()
            # separating topics, fixed.
            topics = [x.strip() for x in data[0].split(',')]

            ## Generate Queries File
            generatequeries(topics)

            target_course = str(data[1].strip())
            last_cid = int(data[2].strip())

            f2.close()
            f1.close()

    course = p.network(target_course)

    result = []
    with open("./postsdataset/transit.dat", "w") as f:
        max_cid = 0
        # get limit+1 posts. E.g. limit=10 will only get you 9 posts
        posts = course.iter_all_posts(limit=51)

        for post in posts:
            if "#pin" in str(post["history"][0]):
                continue

            cid = str(post["nr"])
            max_cid = max(max_cid, int(cid))
            if int(cid) == last_cid:
                break
from piazza_api import Piazza
p=Piazza()
p.user_login('*****@*****.**','Cxy3020840!')
eece210 = p.network("ieby1xzit8r1ki")
post=eece210.get_post(29)
s=post['history'][0]
print s['content']
import json
from time import sleep

from piazza_api import Piazza

import config

username = config.username
password = config.password
class_id = 'is0q8vy2dsm6yx'
p = Piazza()
p.user_login(email=username, password=password)
cis121 = p.network(class_id)

# get the total number of posts
stats = cis121.get_statistics()
# get it using the daily posts in the statistics page
total_num_posts = sum(d['questions'] for d in stats['daily'])

# now load the "class database"
try: 
    with open(config.LOCAL_DATA_FILE_NAME, 'r') as data_file:
        try:
            current_data = json.load(data_file)
        except ValueError: # empty file
            current_data = {}
except IOError: #file didn't exist
    current_data = {}

counter = 0
limit = 10
Example #30
0
    def get(self, course_id, operation):
        if operation == 'get':
            cursor = g.db.execute('''SELECT course_name, piazza_cid
                                     FROM courses
                                     WHERE course_id=(?)''',
                                  [int(course_id)])

            row = cursor.fetchone()
            if row is None:
                raise InvalidUsage('Given course does not exist')
            else:
                course_name_str = row['course_name']
                piazza_id_str = row['piazza_cid']
                return jsonify(message='Returning course info', course_id=course_id, course_name=course_name_str, piazza_cid=piazza_id_str)
        # @deprecated: use 'get' end point instead
        if operation == 'getname':
            cursor = g.db.execute('''SELECT course_name
                                     FROM courses
                                     WHERE course_id=(?)''',
                                  [int(course_id)])

            course_name_row = cursor.fetchone()
            if course_name_row is None:
                raise InvalidUsage('Given course does not have a name')
            else:
                course_name_str = course_name_row['course_name']
                return jsonify(message='Returning name for course', course_id=course_id, course_name=course_name_str)
        # @deprecated: use 'get' end point instead
        elif operation == 'getpiazza':
            cursor = g.db.execute('''SELECT piazza_cid
                                     FROM courses
                                     WHERE course_id=(?)''',
                                  [int(course_id)])

            piazza_id_row = cursor.fetchone()
            if piazza_id_row is None:
                raise InvalidUsage('Given course does not have a Piazza ID')
            else:
                piazza_id_str = piazza_id_row['piazza_cid']
                return jsonify(message='Returning piazza ID for course',
                               course_id=course_id, piazza_cid=piazza_id_str)

        elif operation == 'getpiazzaposts':
            cursor = g.db.execute('''SELECT piazza_cid
                                     FROM courses
                                     WHERE course_id=(?)''',
                                  [int(course_id)])
            piazza_id_row = cursor.fetchone()
            if piazza_id_row is None:
                raise InvalidUsage('Given course does not have a Piazza ID')
            else:
                piazza_id_str = piazza_id_row['piazza_cid']
                p = Piazza()
                try:
                    with open(USER_FILE, 'r') as fname:
                        lines = fname.read().split('\n')
                        p.user_login(email=lines[0], password=lines[1])
                    piazza_class = p.network(piazza_id_str)
                except IndexError:
                    raise InvalidUsage('Piazza credentials are improperly formatted',
                                       status_code=501)
                except IOError:
                    raise InvalidUsage('Unable to find piazza credentials',
                                       status_code=500)
                except AuthenticationError:
                    raise InvalidUsage('Invalid pizza credentials')

                # Attempt to pull a single post. If it doesn't work, we should
                # throw an error
                try:
                    for k in piazza_class.iter_all_posts(limit=1):
                        single_post = k
                except RequestError:
                    raise InvalidUsage('Invalid piazza course ID',
                                       status_code=500)

                def get_posts():
                    for post in piazza_class.iter_all_posts():
                        yield json.dumps(post)

                return Response(get_posts(), mimetype='application/json')
        else:
            raise InvalidUsage('Unknown operation type')
Example #31
0
from webargs.flaskparser import use_kwargs, parser
from flask_pymongo import PyMongo
import re
import requests
import json
import random

app = Flask(__name__)
app.config[
    "MONGO_URI"] = "mongodb+srv://aws-lambda:[email protected]/test?retryWrites=true&w=majority"
api = Api(app)
mongo = PyMongo(app)

get = Piazza()
get.user_login("*****@*****.**", "hackgtdummy")
cs101 = get.network("k26wh1bxb6imp")

BASE_AMAZON_URI = "https://api.amazonalexa.com/v1/"
CLIENT_ID = "amzn1.application-oa2-client.efe9993510944061af51f4e0980f3856"
CLIENT_SECRET = "38d0847f8a78c5fb7ddbeefd569f6a2cdb1a5a0852f22b880602ed1cd2f5882f"


def cleanhtml(raw_html):
    cleanr = re.compile('<.*?>')
    cleantext = re.sub(cleanr, '', raw_html)
    return cleantext


# GET: Get all posts on piazza room
#
import pandas as pd
import json
import numpy as np
from piazza_api import Piazza 

df_user = pd.read_json('config.json', typ="series");

p = Piazza()
p.user_login(df_user["user"], df_user["pass"])
cogs108 = p.network(df_user["network"])
posts = cogs108.iter_all_posts(limit=None)

dict_posts = []
follow_ups = []

for post in posts:
	#if you would like to pretty print the data
	#the built in program already anonymizes all user names to a default: "no"
	#print(json.dumps(post, indent=4, sort_keys=True))

  if post['status'] != 'private':
    dict_post1 = {'content': post['history'][0]['content']}
    dict_posts.append(dict_post1)
    follow_ups.append(len(post['children']))

data = {
	"posts": dict_posts,
    "follow-ups": follow_ups
}

with open('posts.json', 'w') as outfile:
Example #33
0
class Scraper:
    """
    Usage for Scraper:

    >>> s = Scraper() # Initializes scraper/db connection
    >>> s.get(10) #Fetches 10 posts and stores them
    >>> s.print_topics() # Prints list of current topics/tags
    >>> s.print_posts() # Prints all posts in DB
    """
    
    def __init__(self,days_refresh=10):
        self.piazza = Piazza()
        self.piazza.user_login(email=Config.username, password=Config.password)
        self.course = self.piazza.network(Config.courseid)
        self.engine = create_engine('sqlite:///test.db', echo=False)
        self.Session = sessionmaker(bind=self.engine)
        self.session = self.Session()
        self.days    = days_refresh

    def parse(self):
        self.refetch_stats()
        print("Finished getting stats")
        self.delete_recent()
        self.get_recent_posts()

    def get_recent_posts(self):
        """
        Starts the scraper, and stores a certain number of 
        posts to the database: this is the primary method
        to be used in scraping

        <Possible Error> Currently, if you run the scraper twice
        it will duplicate -- must look into methods to fix that
        (probably using ID)

        Parameters:
            limit - Number of posts to fetch

        Returns:
            None
        """

        for _,post in enumerate(self.course.iter_all_posts()):
            if not self.process_one(post):
                return
            print(_,post['history'][0]['subject'])

    def process_one(self, post):
        """
        Takes a post from the Piazza API,
        converts it into a format ready for the database,
        and stores it
        """

        time = parse_time(post['created'])
        duplicate = self.session.query(Post).filter(Post.time == time).first()
        title = remove_tags(post['history'][0]['subject'])
        body = remove_tags(post['history'][0]['content'])
        views = post['unique_views']
        favorites = post['num_favorites']
        if duplicate is not None:
            if post['bucket_name'] == 'Pinned':
                return True
            return False
        sqlpost = Post(title, body, time, views, favorites)

        # Adding Comments
        for comment in process_all_children(post):
            time = parse_time(comment['created'])
            type = comment['type']
            if 'history' not in comment:
                subject = remove_tags(comment['subject'])
            else:
                subject = remove_tags(comment['history'][0]['content'])
            sqlpost.comments.append(Comment(subject, time, type))

        #Adding Tags
        for tag in post['tags']:
            sqlpost.tags.append(self.get_tag(tag))

        #Saving to Database
        self.session.add(sqlpost)
        self.session.commit()
        return True


    def get_tag(self, name):
        """
        Returns the topic if it exists, and otherwise creates it
        """

        tag = self.session.query(Tag).filter(Tag.name == name).first()
        if not tag:
            tag = Tag(name)
            self.session.add(tag)
            self.session.commit()
        return tag

    def print_posts(self):
        """
        Prints a list of all posts currently
        in the database
        """
        posts = self.session.query(Post).all()
        for post in posts:
            print(post)
            for comment in post.children:
                print("\t", comment)

    def print_tags(self):
        """
            Prints a list of topics currently registerd
        """
        topics = self.session.query(Tag).all()
        for topic in topics:
            print(topic)

    def delete_all(self):
        self.session.query(Post).delete()
        self.session.query(Comment).delete()
        self.session.query(Tag).delete()

    def delete_recent(self):
        recent = datetime.now()-timedelta(days=self.days)
        self.session.query(Post).filter(Post.time > recent).delete()

    def refetch_stats(self):
        mostrecent = s.session.query(DayStats).order_by(DayStats.day.desc()).first()
        if mostrecent is not None:
            delta = datetime.now()-mostrecent.day
        if mostrecent is None or delta > timedelta(days=1):
            stats = self.course.get_statistics()
            self.session.query(DayStats).delete()
            for daily in stats['daily']:
                day = parse_day(daily['day'])
                self.session.add(DayStats(daily['questions'],day,daily['users'],daily['posts']))
            self.session.commit()
        print("Finished updating statistics")
Example #34
0
    def get_data(self):
        return ''.join(self.fed)


def strip_tags(html):
    s = MLStripper()
    s.feed(html)
    return s.get_data()


p = Piazza()
p.user_login("*****@*****.**", "59170864aS@")
#TODO UPON ASKING FOR USER CREDENTIALS, LOGIN TO THEIR PIAZZA ACCOUNT

stat200101 = p.network("jz1p1pgbr9e15c")
classes = p.get_user_classes()
print(classes)
print(stat200101)

# this gets the contents of a post: the question
tempPost = stat200101.get_post("361")
tempPostContent = tempPost["history"][0]["content"]
strippedQuestion = strip_tags(tempPostContent)
tempPostAns = tempPost["children"][0]["history"][0]["content"]
strippedAns = strip_tags(tempPostAns)

# go through all the posts, aggregate them in a datastructure
# store that in for easy access
postQuestions = []
postAnswers = []
Example #35
0
from piazza_api import Piazza
import json
import sys

p = Piazza()
p.user_login()

course = p.network(sys.argv[1])

mapSave = {}

posts = course.iter_all_posts(limit=100000000000)
for post in posts:
	content = post["history"][0]["content"]
	id = post["nr"]
	print (id)
	mapSave[id] = content

with open("posts183.json","wb") as f:
	f.write(json.dumps(mapSave))
Example #36
0
import bs4


# In[2]:

p = Piazza()


# In[5]:

p.user_login()


# In[6]:

spark = p.network("i9esrcg0gpf8k")


# In[46]:

out =""
failed= []
for i in range(1,spark.get_statistics()["total"]["questions"]):
    try :
        post = spark.get_post(i)
        text = bs4.BeautifulSoup(post["history"][0]["content"]).getText()
        out = out +" "+  text
    except Exception, e:
        failed.append(i)

Example #37
0
def get_all_online_data(piazza_class):

    # Appends this string to the class name when storing the data
    export_file_suffix = "_posts.csv"

    # Read credentials
    with open(config_file) as f:
        content = f.readlines()
    creds = [x.strip() for x in content]
    username = creds[0]
    password = creds[1]

    p = Piazza()
    p.user_login(username, password)

    class_122_s17 = p.network("ix087c2ns5p656")
    class_122_f15 = p.network("idt0ymj51qr5do")
    class_122_f16 = p.network("irz1akgnpve6eo")
    class_381 = p.network("ixz5scp9zqi583")
    class_601 = p.network("ixs4v2xr1cz10d")

    ### Change this to download a different class's data
    class_to_download = None
    if piazza_class == "122_s17":
        class_to_download = class_122_s17
    elif piazza_class == "122_f15":
        class_to_download = class_122_f15
    elif piazza_class == "122_f16":
        class_to_download = class_122_f16
    elif piazza_class == "381":
        class_to_download = class_381
    elif piazza_class == "601":
        class_to_download = class_601
    else:
        raise ValueError("Invalid class name")
    export_file = piazza_class + export_file_suffix
    ###

    # Calling with no argument gets all posts
    all_responses = class_to_download.iter_all_posts()
    # all_responses = [class_to_download.get_post(2806)]
    print "Finished getting all posts"

    out_file = open(export_file, "wb")
    csv_writer = csv.writer(out_file)

    # Write column headers of feature names
    csv_writer.writerow(TITLE_FIELDS)

    count = 0

    for response in all_responses:

        # for r in response:
        # print r, response[r]

        fields_dict = get_relevant_fields(response)

        # print fields_dict["tags"]

        if fields_dict == None:
            continue

        values = [fields_dict[k] for k in TITLE_FIELDS]

        line = ','.join(values)

        try:
            csv_writer.writerow(values)
        except Exception as e:
            print "Failed on id=", fields_dict["cid"]
            print(e)
            exit(1)

        if count % 200 == 0:
            print count
        count += 1

    out_file.close()
Example #38
0
class PiazzaHandler:
    """
    Handles requests to a specific Piazza network. Requires an e-mail and password, but if none are
    provided, then they will be asked for in the console (doesn't work for Heroku deploys). API is rate-limited
    (max is 55 posts in about 2 minutes?) so it's recommended to be conservative with fetch_max, fetch_min and only change them if necessary.

    All `fetch_*` functions return JSON directly from Piazza's API and all `get_*` functions parse that JSON.

    # todo missing docs for some attributes
    Attributes
    ----------
    name : `str`
        Name of class (ex. CPSC221)

    nid : `str`
        ID of Piazza forum (usually found at the end of a Piazza's home url)

    email : `str (optional)`
        Piazza log-in email

    password : `str (optional)`
        Piazza password

    guild : `discord.Guild`
        Guild assigned to the handler

    fetch_max : `int (optional)`
        Upper limit on posts fetched from Piazza.

    fetch_min: `int (optional)`
        Lower limit on posts fetched from Piazza. Used as the default value for functions that don't need to fetch a lot of posts
    """
    def __init__(self,
                 name: str,
                 nid: str,
                 email: str,
                 password: str,
                 guild: discord.Guild,
                 fetch_max: int = 55,
                 fetch_min: int = 30):
        self._name = name
        self.nid = nid
        self._guild = guild
        self._channels = []
        self.url = f"https://piazza.com/class/{self.nid}"
        self.p = Piazza()
        self.p.user_login(email=email, password=password)
        self.network = self.p.network(self.nid)
        self.fetch_max = fetch_max
        self.fetch_min = fetch_min

    @property
    def piazza_url(self) -> str:
        return self.url

    @piazza_url.setter
    def piazza_url(self, url: str) -> None:
        self.url = url

    @property
    def course_name(self) -> str:
        return self._name

    @course_name.setter
    def course_name(self, name: str) -> None:
        self._name = name

    @property
    def piazza_id(self) -> str:
        return self.nid

    @piazza_id.setter
    def piazza_id(self, nid: str) -> None:
        self.nid = nid

    @property
    def guild(self) -> discord.Guild:
        return self._guild

    @guild.setter
    def guild(self, guild: discord.Guild) -> None:
        self._guild = guild

    @property
    def channels(self) -> List[int]:
        return self._channels

    @channels.setter
    def channels(self, channels: List[int]) -> None:
        self._channels = channels

    def add_channel(self, channel: int) -> None:
        if channel not in self.channels:
            self._channels.append(channel)

    def remove_channel(self, channel: int) -> None:
        if channel in self.channels:
            self._channels.remove(channel)

    def fetch_post_instance(self, post_id: int) -> dict:
        """
        Returns a JSON object representing a Piazza post with ID `post_id`, or returns None if post doesn't exist

        Parameters
        ----------
        post_id : `int`
            requested post ID
        """

        try:
            post = self.network.get_post(post_id)
        except piazza_api.exceptions.RequestError as ex:
            raise InvalidPostID("Post not found.") from ex

        if self.check_if_private(post):
            raise InvalidPostID("Post is Private.")

        return post

    async def fetch_recent_notes(self, lim: int = 55) -> List[dict]:
        """
        Returns up to `lim` JSON objects representing instructor's notes that were posted today

        Parameters
        ----------
        lim : `int (optional)`
            Upper limit on posts fetched. Must be in range [fetch_min, fetch_max] (inclusive)
        """

        posts = await self.fetch_posts_in_range(days=0,
                                                seconds=60 * 60 * 5,
                                                lim=lim)
        response = []

        for post in posts:
            if post["tags"][0] == "instructor-note" or post[
                    "bucket_name"] == "Pinned":
                response.append(post)

        return response

    def fetch_pinned(self, lim: int = 0) -> List[dict]:
        """
        Returns up to `lim` JSON objects representing pinned posts\n
        Since pinned posts are always the first notes shown in a Piazza, lim can be a small value.

        Parameters
        ----------
        lim : `int`
            Upper limit on posts fetched. Must be in range [fetch_min, fetch_max] (inclusive)
        """

        posts = self.network.iter_all_posts(limit=lim or self.fetch_min)
        response = []

        for post in posts:
            if self.check_if_private(post):
                continue

            if post["bucket_name"] and post["bucket_name"] == "Pinned":
                response.append(post)

        return response

    async def fetch_posts_in_range(self,
                                   days: int = 1,
                                   seconds: int = 0,
                                   lim: int = 55) -> List[dict]:
        """
        Returns up to `lim` JSON objects that represent a Piazza post posted today
        """

        if lim < 0:
            raise ValueError(f"Invalid lim for fetch_posts_in_days(): {lim}")

        posts = []

        feed = self.network.get_feed(limit=lim, offset=0)

        for cid in map(itemgetter("id"), feed["feed"]):
            post = None
            retries = 5

            while not post and retries:
                try:
                    post = self.network.get_post(cid)
                except piazza_api.exceptions.RequestError as ex:
                    retries -= 1

                    if "foo fast" in str(ex):
                        await asyncio.sleep(1)
                    else:
                        break
            if post:
                posts.append(post)

        date = datetime.date.today()
        result = []

        for post in posts:
            # [2020,9,19] from 2020-09-19T22:41:52Z
            created_at = [int(x) for x in post["created"][:10].split("-")]
            created_at = datetime.date(created_at[0], created_at[1],
                                       created_at[2])

            if self.check_if_private(post):
                continue
            elif (date - created_at).days <= days and (
                    date - created_at).seconds <= seconds:
                result.append(post)

        return result

    def get_pinned(self) -> List[dict]:
        """
        Returns an array of `self.min` objects containing a pinned post's post id, title, and url.
        """

        posts = self.fetch_pinned()
        response = []

        for post in posts:
            post_details = {
                "num": post["nr"],
                "subject": post["history"][0]["subject"],
                "url": f"{self.url}?cid={post['nr']}",
            }
            response.append(post_details)

        return response

    def get_post(self, post_id: int) -> typing.Union[dict, None]:
        """
        Returns a dict that contains post information to be formatted and returned as an embed

        Parameters
        ----------
        post_id : `int`
            int associated with a Piazza post ID
        """

        post = self.fetch_post_instance(post_id)

        if post:
            post_type = "Note" if post["type"] == "note" else "Question"
            response = {
                "subject": self.clean_response(post["history"][0]["subject"]),
                "num": f"@{post_id}",
                "url": f"{self.url}?cid={post_id}",
                "post_type": post_type,
                "post_body": self.clean_response(self.get_body(post)),
                "i_answer": None,
                "s_answer": None,
                "num_followups": 0
            }

            answers = post["children"]

            if answers:
                num_followups = 0

                for answer in answers:
                    if answer["type"] == "i_answer":
                        response["i_answer"] = self.clean_response(
                            self.get_body(answer))
                    elif answer["type"] == "s_answer":
                        response["s_answer"] = self.clean_response(
                            self.get_body(answer))
                    else:
                        num_followups += self.get_num_follow_ups(answer)

                response.update({"num_followups": num_followups})

            response.update({"tags": ", ".join(post["tags"] or "None")})
            return response
        else:
            return None

    def get_num_follow_ups(self, answer: dict) -> int:
        return 1 + sum(self.get_num_follow_ups(i) for i in answer["children"])

    async def get_posts_in_range(self,
                                 show_limit: int = 10,
                                 days: int = 1,
                                 seconds: int = 0) -> List[List[dict]]:
        if show_limit < 1:
            raise ValueError(
                f"Invalid showLimit for get_posts_in_range(): {show_limit}")

        posts = await self.fetch_posts_in_range(days=days,
                                                seconds=seconds,
                                                lim=self.fetch_max)
        instr, stud = [], []
        response = []

        def create_post_dict(post: dict, tag: str) -> dict:
            return {
                "type": tag,
                "num": post["nr"],
                "subject": self.clean_response(post["history"][0]["subject"]),
                "url": f"{self.url}?cid={post['nr']}"
            }

        def filter_tag(post: dict, arr: List[dict], tagged: str) -> None:
            """Sorts posts by instructor or student and append it to the respective array of posts"""

            for tag in post["tags"]:
                if tag == tagged:
                    arr.append(create_post_dict(post, tag))
                    break

        # first adds all instructor notes to update, then student notes
        # for student notes, show first 10 and indicate there's more to be seen for today
        for post in posts:
            filter_tag(post, instr, "instructor-note")

        if len(posts) - len(instr) <= show_limit:
            for p in posts:
                filter_tag(p, stud, "student")
        else:
            for i in range(show_limit + 1):
                filter_tag(posts[i], stud, "student")

        response.append(instr)
        response.append(stud)
        return response

    async def get_recent_notes(self) -> List[dict]:
        """
        Fetches `fetch_min` posts, filters out non-important (not instructor notes or pinned) posts and
        returns an array of corresponding post details
        """

        posts = await self.fetch_recent_notes(lim=self.fetch_min)
        response = []

        for post in posts:
            post_details = {
                "num": post["nr"],
                "subject": self.clean_response(post["history"][0]["subject"]),
                "url": f"{self.url}?cid={post['nr']}"
            }
            response.append(post_details)

        return response

    def check_if_private(self, post: dict) -> bool:
        return post["status"] == "private"

    def clean_response(self, res: str) -> str:
        if len(res) > 1024:
            res = res[:1000]
            res += "...\n\n *(Read more)*"

        tag_regex = re.compile("<.*?>")
        res = html.unescape(re.sub(tag_regex, "", res))

        if len(res) < 1:
            res += "An image or video was posted in response."

        return res

    def get_body(self, res: dict) -> str:
        body = res["history"][0]["content"]

        if not body:
            raise Exception("Body not found.")

        return body
Example #39
0
p = Piazza()
p.user_login('*****@*****.**', 'thomaslau')

f = open('userProfile.txt','w')
json.dump(p.get_user_profile(), f)
f.close()

rawUserData = open('userProfile.txt')
jsonUserData = json.load(rawUserData)
rawUserData.close()


masterPath = os.getcwd()

for i in jsonUserData["all_classes"]:
    classConnection = p.network(i)
    posts = classConnection.iter_all_posts(limit=100000)

    className = jsonUserData["all_classes"][i]["num"]
    if className == "CS 103":
        x = 0
    elif className == "CS 110":
        x = 0
    else:
        continue


    # reset to the relative root
    os.chdir(masterPath)
    if not os.path.exists(masterPath + "/" + className):
        os.mkdir(masterPath + "/" + className)