def get_answers(qid, site='stackoverflow.com'): if site.startswith('stacko'): # StackOverflow se = stackexchange.Site(stackexchange.StackOverflow) elif site.startswith('unix'): # Unix StackExchange se = stackexchange.Site(stackexchange.UnixampLinux) elif site.startswith('sup'): # Supa Hot Fire se = stackexchange.Site(stackexchange.SuperUser) elif site.startswith('ser'): # ServerFault se = stackexchange.Site(stackexchange.ServerFault) else: pass se.be_inclusive() question = se.question(qid) question_text = clean(question.title) + ' ' + clean( html2text(question.body)) answers = [] for answer in question.answers: answers.append(clean(html2text(answer.body))) return question_text, answers
def test_resultset_independence(self): # repro code for bug #4 (thanks, beaumartinez!) # Create two different sites. a = stackexchange.Site('api.askubuntu.com') b = self.site # Create two different searches from the different sites. a_search = a.search(intitle='vim', pagesize=100) b_search = b.search(intitle='vim', pagesize=100) # (We demonstrate that the second search has a second page.) self.assertEqual(len(b_search.fetch_next()), 100) # Reset the searches. a_search = a.search(intitle='vim', pagesize=100) b_search = b.search(intitle='vim', pagesize=100) # Exhaust the first search. while len(a_search) > 0: a_search = a_search.fetch_next() # Try get the next page of the second search. It will be empty. # Here's the bug. self.assertEqual(len(b_search.fetch_next()), 100)
def main(): tags = sys.argv[1:] or _default_tags seen_ids = set() so = stackexchange.Site(stackexchange.StackOverflow) so.be_inclusive() query = stackexchange.QuestionsQuery(so) while True: questions = [] for tag in tags: questions.extend(query.no_answers(pagesize=3, tagged=tag)[:3]) for question in questions: if question.id not in seen_ids: notify(question.title) print '%s %3d %3d\t%s [%s] (%s)' % ( question.creation_date, question.score, question.view_count, question.title, ', '.join(question.tags), question.url, ) seen_ids.add(question.id) time.sleep(10)
def reputation(id): API_KEY = ")e55ob6fBvCtSTibWPyP*A((" site = stackexchange.Site(stackexchange.StackOverflow, API_KEY, impose_throttling = True) user = site.user(id) recent = site.recent_questions() return render_template('details.html', uname=user.displayname, recent=recent);
def runStackExchangeLoad(self): # Parse settings.cfg Configuration File config = ConfigParser.RawConfigParser() config.read('settings.cfg') # Read StackExchange Configuration Keys CLIENT_ID = config.get('StackExchange', 'client_id') CLIENT_SECRET = config.get('StackExchange', 'client_secret') APP_KEY = config.get('StackExchange', 'key') #Use API Key with Library stack_exchange = stackexchange.Site(stackexchange.StackOverflow, app_key=APP_KEY) ##########Explaination from https://github.com/lucjon/Py-StackExchange################## #With an API key, requests are limited to thirty per five seconds. #By default, the library will return an error before even making an HTTP request if we go over this limit. #Alternatively, you can configure it such that it will wait until it can make another request without returning an error. #To enable this behaviour. set the impose_throttling and throttle_stop property ######################################################################################### #Set Impose Throttling to True #Set Throttle Stop to False stack_exchange.impose_throttling = True stack_exchange.throttle_stop = False # Read all the Projects from MongoDB (OSSRank Database) projects = getProjects(config) # For each Project , Get Top Questions and Tagged Discussions for last N years from stack exchange sites self.searchStackExchange(stack_exchange, config, projects) self.response.write("Num projects : " + str(len(projects)) + "<br/>")
def getLastDiscussedDates(): config_dict = Common_Utilities.read_config_file() # read all config data user_api_key = config_dict["SO_TOKEN"] so = stackexchange.Site(stackexchange.StackOverflow, app_key=user_api_key, impose_throttling=True) so.be_inclusive() data = loadLastDiscussedSOData() libraries = Library.objects.all() for library in libraries: tag = library.so_tag questions = so.questions(sort='creation', order='DESC', tagged=[tag, 'java']) dates_string = "" for i in range(0, 10): if questions == None or i >= len(questions): break if i > 0: dates_string += ';' dates_string += questions[i].creation_date.strftime( "%m/%d/%Y, %H:%M:%S") + " UTC" if len(dates_string) == 0: data[tag] = None else: data[tag] = dates_string saveData(data)
def requests(user): API_KEY = ")e55ob6fBvCtSTibWPyP*A((" site = stackexchange.Site(stackexchange.StackOverflow, API_KEY, impose_throttling = True) uname = user; user = site.user(user) recent = site.recent_questions() return render_template('requests.html', user=user, recent=recent, site=site, uname=uname)
async def stack_overflow(self, ctx: Context, *, query: str) -> None: """Queries Stackoverflow and gives you top results.""" async with ctx.typing(): site = stackexchange.Site(stackexchange.StackOverflow, StackExchangeToken) site.impose_throttling = True site.throttle_stop = False results = site.search(intitle=query)[:5] embed = Embed(title="StackOverflow search") embed.set_thumbnail(url=f"http://s2.googleusercontent.com/s2/favicons?domain_url={site.domain}") description = f"**Query:** {query}\n" if results: embed.color = Color.blue() else: embed.color = Color.red() description += "\nSorry, No results found for given query." for result in results: # Fetch question's data, include vote_counts and answers result = site.question(result.id, filter="!b1MME4lS1P-8fK") description += f"\n**[{result.title}](https://{site.domain}/q/{result.id})**" description += f"\n**Score:** {result.score}, **Answers:** {len(result.answers)}\n" embed.description = description await ctx.send(embed=embed)
async def stack(self, ctx: Context, siteName: str, *, query: str) -> None: """Queries given StackExchange website and gives you top results. siteName is case-sensitive.""" if siteName[0].islower() or siteName not in dir(se): await ctx.send( f"{siteName} does not appear to be in the StackExchange network." " Check the case and the spelling.") site = se.Site(getattr(se, siteName), self.bot.config["SE_KEY"]) site.impose_throttling = True site.throttle_stop = False qs = site.search(intitle=query)[:3] if qs: emb = discord.Embed(title=query) emb.set_thumbnail( url= f"http://s2.googleusercontent.com/s2/favicons?domain_url={site.domain}" ) emb.set_footer(text="Hover for vote stats") for q in qs: # Fetch question's data, include vote_counts and answers q = site.question(q.id, filter="!b1MME4lS1P-8fK") emb.add_field( name=f"`{len(q.answers)} answers` Score : {q.score}", value=f"[{q.title}](https://{site.domain}/q/{q.id}" f' "{q.up_vote_count}🔺|{q.down_vote_count}🔻")', inline=False, ) await ctx.send(embed=emb) else: await ctx.send("No results")
async def stackoverflow(self, ctx, *, text: str): """Queries StackOverflow and gives you top results""" so = se.Site(se.StackOverflow, SE_KEY) so.impose_throttling = True so.throttle_stop = False async with ctx.typing(): qs = so.search(intitle=text)[:3] if qs: emb = discord.Embed(title=text) emb.set_thumbnail( url= 'https://cdn.sstatic.net/Sites/stackoverflow/company/img/logos/so/so-icon.png?v=c78bd457575a' ) emb.set_footer(text="Hover for vote stats") for q in qs: q = so.question( q.id, filter="!b1MME4lS1P-8fK" ) # Fetch question's data, include vote_counts and answers emb.add_field( name=f"`{len(q.answers)} answers` Score : {q.score}", value= f'[{q.title}](https://stackoverflow.com/q/{q.id} "{q.up_vote_count}🔺|{q.down_vote_count}🔻")', inline=False) await ctx.send(embed=emb) else: await ctx.send("No results")
def fetch_stackoverflow(config): # get all stackoverflow ids form member profiles logger.info('Fetch data from stackoverflow...') logger.info('...Get stackoverflow ids from Plone.') url = (config.get('general', 'plone_url') + '/@@contributor-stackoverflow-ids') r = requests.get( url, auth=get_auth(config), allow_redirects=False) # don't redirect to the login form for unauth if r.status_code != 200: msg = ("Can't fetch stackoverflow user ids from plone." 'status: %s.') % r.status_code if r.status_code == 302: msg += 'This might be and authentication error.' logger.error(msg) logger.error('Cannot fetch stackoverflow data. Exit.') exit(1) stackoverflow_users = r.json()['data'] logger.info('...Start getting data from stackoverflow.') plone_member_ids = check_debug_limit(stackoverflow_users.keys(), 'stackoverflow users') stackoverflow = stackexchange.Site(stackexchange.StackOverflow) for member_id in plone_member_ids: stackoverflow_id = stackoverflow_users[member_id] activity = _so_activity_for_user(stackoverflow, stackoverflow_id, member_id) stackoverflow_users[member_id] = activity logger.info('Done.') return stackoverflow_users
def so(components): # !so <search term> '''Search the Stack Overflow site and returns the first question's title and URL ''' response = '' terms = components['arguments'].split('!so ') # notice the trailing space if 1 == len(terms): # no search term given response = 'Usage: !so <search term>' else: if terms[1].lstrip(): so = stackexchange.Site(stackexchange.StackOverflow, api_key) try: qs = so.search(intitle=terms[1].lstrip()) except urllib2.HTTPError, e: response = "The server couldn't fulfill the request!" if hasattr(e, 'reason'): # pragma: no branch response = response + '\r\nReason: ' + str(e.reason) if hasattr(e, 'code'): # pragma: no branch response = response + '\r\nCode: ' + str(e.code) else: if 1 <= len(qs): response = qs[0].title + '\r\n' + qs[0].url else: response = 'Not found: ' + terms[1] else:
def se_user(site, idvalue): try: cse = stackexchange.Site(site, None) user = cse.user(idbalue) return {'result': user} except: return
def se_recent(site): try: cse = stackexchange.Site(site, None) questions = cse.recent_questions(pagesize=20, filter='_b') return [value.title for value in questions] except: return
def fetch_questions_from_api(**options): # Options api_key = options.get('api_key', config.get('api_key')) accepted = options.get('accepted', config.get('accepted')) closed = options.get('closed', config.get('closed')) max_hours = options.get('max_hours', config.get('max_hours')) min_hours = options.get('min_hours', config.get('min_hours')) include_tags = options.get('include_tags', config.get('include_tags')) exclude_tags = options.get('exclude_tags', config.get('exclude_tags')) # API Interface so = stackexchange.Site(stackexchange.StackOverflow, api_key) # Respect the rate limit. throttle_stop will throw an error when we hit the # limit which we can catch. (This is the default behavior.) so.throttle_stop = True # Search for Stack Overflow questions questions = so.search(accepted=accepted, closed=closed, fromdate=hours_ago_to_unix_timestamp(max_hours), todate=hours_ago_to_unix_timestamp(min_hours), tagged=include_tags, nottagged=exclude_tags) return questions
def main(): """ Query SO and output to file """ user_api_key = 'YOURSTACKOVERFLOWAPIKEYHERE' tags = ['mesos', 'dcos', 'mesosphere'] # Query StackOverflow so_connect = stackexchange.Site(stackexchange.StackOverflow, app_key=user_api_key, impose_throttling=True) so_res = [] for tag in tags: for question in so_connect.questions(tagged=tag, body=True): so_res.append(question) for question in so_connect.search(intitle=tag): full_question = so_connect.question(question.id) so_res.append(full_question) output_file = open("stack.json", "w") if so_res: # Since we used both API's we may have dupes, remove them here deduped = {r.id: r for r in so_res}.values() # Get the JSON output field jsonq = [d.json for d in deduped] for jquestion in jsonq: output_file.write("{}\n".format(json.dumps(jquestion))) output_file.close()
def run(self): try: import stackexchange except ImportError as ie: raise ie so = stackexchange.Site(stackexchange.StackOverflow) corda = so.tag("corda") return {"stackoverflow_corda_questions": corda.count}
def about(user): API_KEY = ")e55ob6fBvCtSTibWPyP*A((" site = stackexchange.Site(stackexchange.StackOverflow, API_KEY, impose_throttling = True) uname = user; user = site.user(user) tags = user.tags.fetch() tag_len = len(tags) return render_template('about.html', user=user, tags=tags, tag_len=tag_len, uname = uname)
def fav(user): API_KEY = ")e55ob6fBvCtSTibWPyP*A((" site = stackexchange.Site(stackexchange.StackOverflow, API_KEY, impose_throttling=True) user = site.user(user) fav = user.favorites.fetch() return render_template('fav.html', user=user, fav=fav, site=site)
def __init__(self): self.site = stackexchange.Site(stackexchange.StackOverflow) self.site.be_inclusive() self.body = "" self.raw = "" self.qTree = None self.aList = [] self.id = 0 self.prob = []
def download(): # Record a start time, so we can time the whole operation. I don't think we care # that much, I just normally add some timing code. time_start = time.time() parser = argparse.ArgumentParser() parser.add_argument("out", help="the name of the output file") parser.add_argument("num_iter", help="total number of requests to make", type=int) parser.add_argument("--descriptions", help="get questions, tags, and descriptions") args = parser.parse_args() if args.descriptions: print "Fetching questions, tags, and descriptions." descriptions_on = True else: print "Fetching questions and tags." descriptions_on = False output_filename = args.out num_iter = args.num_iter # Set up a link to StackOverflow (as opposed to one of the many StackExchange # sites) using our API key. stackOverflow = stackexchange.Site(stackexchange.StackOverflow, '8D*yq20*d3XiEdEn46BmMQ((') # If we want to fetch the body of the posts as well, we need to specify the request to # include all data if descriptions_on: stackOverflow.be_inclusive() # Set the throttling property to avoid being banned. With this enabled, the # Py-StackExchange wrapper will make sure we don't send requests at a rate that # would get us banned. However, the program just gets shut down if we send # requests too quickly, so we still have to add timing code later. stackOverflow.impose_throttling = True stackOverflow.throttle_stop = False num_fetched = 0 with open(output_filename, 'w') as outfile: for num in range(0,num_iter): # add some timing code to make sure the script actually # finishes without being throttled # 100 is the max number of questions we can request at once questions = stackOverflow.questions(pagesize=100) num_fetched += 100 for question in questions: outfile.write("title:{0} tags:".format(question.title.encode('utf-8'))) outfile.write(','.join(question.tags).encode('utf-8')) if descriptions_on: outfile.write("description:{0}".format(question.body.encode('utf-8'))) outfile.write("\n") time.sleep(4) # sleep for 4 seconds to ensure we don't get cut off time_done = time.time() print "Fetched {0} questions of {1} requested in {2} seconds.".format(num_fetched, (num_iter*100), (time_done - time_start))
def index(): date = numpy.floor(time.time()) backtime = date - 200000 so = stackexchange.Site(stackexchange.StackOverflow, 'Z)iD*QKTXBvP6HbHiwcukg((') stackexchange.web.WebRequestManager.debug = True so.impose_throttling = True so.throttle_stop = False questions = list(so.questions(sort=stackexchange.Sort.Creation, order=stackexchange.DESC, from_date=backtime, to_date=date)) # questions = so.recent_questions(pagesize=10) return render_template('index.html', questions=questions)
def answers_to_question(id): #key = 'TIIINIXcEDncTZQbPSzsiA' #key = 'G96PVW9Zi*voBduDLhsecA((' #key = 'ol7GTKMQYYd9KBwDHqq9fg((' so = stackexchange.Site(stackexchange.StackOverflow, "G96PVW9Zi*voBduDLhsecA((") so.impose_throttling = True so.throttle_stop = False so.be_inclusive() question = so.question(id) return question.answers
def process(user): API_KEY = ")e55ob6fBvCtSTibWPyP*A((" site = stackexchange.Site(stackexchange.StackOverflow, API_KEY, impose_throttling = True) uname = user user = site.user(user) question=request.form['question'] rake_object = rake.Rake("SmartStoplist.txt", 3, 5, 1) keywords = rake_object.run(question) print "keywords: ", keywords recent = site.recent_questions() return render_template('postfinal.html', user=user, site=site, keywords=keywords, question=question, recent=recent, uname = uname)
def test_fetch_paged(self): user = stackexchange.Site(stackexchange.Programmers, API_KEY).user(USER_ID) answers = user.answers.fetch(pagesize=60) for answer in answers: # dummy assert.. we're really testing paging here to make sure it doesn't get # stuck in an infinite loop. there very well may be a better way of testing this, # but it's been a long day and this does the trick # this used to test for title's presence, but title has been removed from the # default filter self.assertTrue(answer.id is not None)
def query(res): so = ste.Site(ste.StackOverflow) stack_table = [] for j, i in enumerate(so.search(intitle=res, sort='votes')): if j > 5: break if (i.owner): stack_table.append([ i.owner.profile_image, i.owner.display_name, i.score, i.title, i.link ]) return stack_table
def download(): # Record a start time, so we can time the whole operation. I don't think we care # that much, I just normally add some timing code. time_start = time.time() parser = argparse.ArgumentParser() parser.add_argument("out", help="the name of the output file") parser.add_argument("--descriptions", help="get questions, tags, and descriptions") args = parser.parse_args() if args.descriptions: print "Fetching questions, tags, and descriptions." descriptions_on = True else: print "Fetching questions and tags." descriptions_on = False output_filename = args.out # Set up a link to StackOverflow (as opposed to one of the many StackExchange # sites) using our API key. stackOverflow = stackexchange.Site(stackexchange.StackOverflow, '8D*yq20*d3XiEdEn46BmMQ((') # If we want to fetch the body of the posts as well, we need to specify the request to # include all data if descriptions_on: stackOverflow.be_inclusive() # Set the throttling property to avoid being banned. With this enabled, the # Py-StackExchange wrapper will make sure we don't send requests at a rate that # would get us banned. However, the program just gets shut down if we send # requests too quickly, so we still have to add timing code later. stackOverflow.impose_throttling = True stackOverflow.throttle_stop = False duplicate_sets = initialize_list() with open(output_filename, 'w') as outfile: for dup_set in duplicate_sets: outfile.write("<begin_duplicate_set>\n") for dup in dup_set: question = stackOverflow.question(dup) outfile.write("<begin_title>\n{0}\n<end_title>\n <begin_tags>\n".format(question.title.encode('utf-8'))) outfile.write(','.join(question.tags).encode('utf-8')) outfile.write("\n<end_tags>\n") if descriptions_on: outfile.write("<begin_body>\n{0}\n<end_body>\n".format(question.body.encode('utf-8'))) outfile.write("\n") #time.sleep(4) # sleep for 4 seconds to ensure we don't get cut off outfile.write("<end_duplicate_set>\n") time_done = time.time() print "Fetched {0} duplicate sets in {1} seconds.".format(len(duplicate_sets), (time_done - time_start))
def userpage(userid): """ Stackoverflow user profile browsing :param userid: :return: """ global data import stackexchange try: if "api_key" not in data: data["api_key"] = None userprofile = stackexchange.Site(stackexchange.StackOverflow,app_key=data["api_key"]).user(userid) print(bold("\n User: "******"\n\tReputations: " + userprofile.reputation.format()) print_warning("\n\tBadges:") print("\t\t Gold: " + str(userprofile.gold_badges)) print("\t\t Silver: " + str(userprofile.silver_badges)) print("\t\t Bronze: " + str(userprofile.bronze_badges)) print("\t\t Total: " + str(userprofile.badge_total)) print_warning("\n\tStats:") total_questions = len(userprofile.questions.fetch()) unaccepted_questions = len(userprofile.unaccepted_questions.fetch()) accepted = total_questions - unaccepted_questions rate = accepted / float(total_questions) * 100 print("\t\t Total Questions Asked: "+ str(len(userprofile.questions.fetch()))) print('\t\t Accept rate is: %.2f%%.' % rate) print('\nMost experienced on %s.' % userprofile.top_answer_tags.fetch()[0].tag_name) print('Most curious about %s.' % userprofile.top_question_tags.fetch()[0].tag_name) except urllib.error.URLError: print_fail("Please check your internet connectivity...") exit(1) except Exception as e: showerror(e) if str(e) == "400 [bad_parameter]: `key` doesn't match a known application": print_warning("Wrong API key... Deleting the data file...") del_datafile() exit(1) elif str(e) in ("not enough values to unpack (expected 1, got 0)" , "400 [bad_parameter]: ids"): global manual if manual == 1: print_warning("Wrong user ID specified...") helpman() exit(1) print_warning("Wrong user ID... Deleting the data file...") del_datafile() exit(1) # Reaches here when rate limit exceeds print_warning("Stackoverflow exception. This might be caused due to the rate limiting: http://stackapps.com/questions/3055/is-there-a-limit-of-api-requests") print("Use http://stackapps.com/apps/oauth/register to register a new API key.") set_api_key() exit(1)
def AskGuru(question): stackoverFlow = stackexchange.Site(stackexchange.StackOverflow) print("Asking: " + question) searchResults = stackoverFlow.search_advanced(pagesize=1, sort="relevance", q=question, accepted=True, page=1) if searchResults: result = searchResults[0] messageStart = "Does this help? : \n \n" return messageStart + result.title + "\n\t\t\t" + result.link + "\n" else: return "Sorry I cannot think of a good answer to that, try rephrasing that."
def main(): try: site = stackexchange.Site(stackexchange.StackOverflow, 'NGR16KWDHIjGDT4E0ZBt4w((') print "testtest 1" site.be_inclusive() print "testtest 2" count = 0 conn = psycopg2.connect( "dbname='ry2294' user='******' password='******' host='w4111db.eastus.cloudapp.azure.com'" ) print "testtest 3" for q in site.questions(pagesize=100, filter='!BHTP)Vsd03.5*FXL(yaiuPYpjIkySi'): try: try: insertUser(site, q.owner_id, conn) except Exception as e: print "Could not add user", e else: body = BeautifulSoup(q.body).text #TODO selectquestion = 'select * from questions where questionid = %d;' % ( q.id) insertquestion = 'insert into questions values( %d, %d, \'%s\', \'%s\',\'%s\')' % ( q.id, q.owner_id, q.creation_date, q.title.replace("'", "''"), body.replace("'", "''")) cur = conn.cursor() try: cur.execute(selectquestion) except Exception as e: print e else: if cur.fetchone() == None: cur.execute(insertquestion) conn.commit() for tag in q.tags: insertTag(tag, conn) insertTagged(tag, q.id, conn) for ans in q.answers: insertAnswer(ans, conn, q.id, site) count = count + 1 if (count >= 100): break except Exception as e: print "Error ", e except Exception as e: print "test", e