def get_stored_questions(API_name, Start_date=Date(1, 1, 2010), End_date=Date(12, 18, 2018)): site = StackAPI('stackoverflow') site.max_pages = 1 questions = site.fetch('questions', fromdate=datetime(2010, 1, 1), todate=datetime(2018, 12, 18), tagged=API_name) stored_quest = dict() stored_quests = list() # accesses the dict that is stored at questions['items] for lists in questions['items']: # stores only the relevant key value pairs into a new dict stored_quests stored_quest = { "title": lists.get("title"), "score": lists.get("score"), "is_answered": lists.get("is_answered"), "tags": lists.get("tags"), "question_id": lists.get("question_id"), "link": lists.get("link") } stored_quests.append(stored_quest) return stored_quests
def identify_questions(tags): ques_id = [] ques_score = [] ques_body = [] query = "" for i in range(len(tags)): tags[i] = ps.stem(tags[i]) query = query + tags[i] + ';' query = query[0:-1] try: SITE = StackAPI('stackoverflow') SITE.page_size = page_size SITE.max_pages = max_pages questions = SITE.fetch('search', tagged=query, sort='relevance') for item in questions[u'items']: tags_ques = item[u'tags'] for i in range(len(tags_ques)): tags_ques[i] = ps.stem(tags_ques[i]) cnt = 0 for tag in tags_ques: if tag not in tags: cnt += 1 temp = len(tags) - len(list(set(tags).intersection(tags_ques))) cnt = cnt + (temp - len(tags)) if cnt < 0: ques_id.append(item[u'question_id']) ques_score.append(cnt) ques_body.append(item[u'title']) except StackAPIError as e: print e.message print ques_id[1] return ques_id, ques_score, ques_body
def buscar_questoes(tag="python"): # definicao do pt.stackoverflow sopt = StackAPI("pt.stackoverflow") # conf de numero de resultados sopt.page_size = 100 sopt.max_pages = 1 resultado = [] # busca por questoes/tag de acordo com intervalo de tempo(atualmente de 1 dia) questoes_python = sopt.fetch('questions', min=1, fromdate=tsInicio, todate=tsHoje, tagged=tag) # return str(html.unescape(questoes_python['items'][0]['title'])) for i in range(0, len(questoes_python['items'])): resultado.append(""" Titulo: {} Link: {} Criacao: {} """.format(html.unescape(questoes_python['items'][i]['title']), questoes_python['items'][i]['link'], questoes_python['items'][i]['creation_date'])) return resultado
def api_query(): SITE = StackAPI('stackoverflow') SITE.max_pages = 200 questions = SITE.fetch('questions/no-answers', order='desc', fromdate=int(time.time()) - 3600 * 72, sort='creation', tagged='google-cloud-platform') question_table = [ dict(title='TITLE', date='DATE', tags='TAGS', views='PAGE VIEWS', link='LINK') ] print(question_table) for question in questions['items']: current = dict(title=question['title'], date=time.strftime( '%m-%d %H:%M', time.localtime(question['creation_date'])), tags=', '.join( str(e) for e in question['tags'] if e != 'google-cloud-platform'), views=str(question['view_count']), link=question['link']) question_table.append(current) return render_template('query.html', questions=question_table)
def get_stored_answers(id): site = StackAPI('stackoverflow') site.max_pages = 1 answers = site.fetch('answers', id) return answers
def getUsers(): SITE = StackAPI('stackoverflow') SITE.max_pages = 1 SITE.page_size = 100 try: users.append(SITE.fetch('users', sort='reputation')) except: time.sleep(5) users.append(SITE.fetch('users', sort='reputation'))
def test_buscar_questoes(): sopt = StackAPI("pt.stackoverflow") sopt.page_size = 100 sopt.max_pages = 1 questoes_python = sopt.fetch('questions', min=1, fromdate=1534582800, todate=1534636800, tagged='python') assert 1534625951 == questoes_python['items'][0]['creation_date']
def getQuestions(): SITE = StackAPI('stackoverflow') SITE.max_pages = 10 SITE.page_size = 100 try: questions.append(SITE.fetch('questions', tagged='C#', sort='votes')) except: time.sleep(5) questions.append(SITE.fetch('questions', tagged='C#', sort='votes')) time.sleep(15)
def get_tags(parameter_list, count, sitename, maxpage, pagesize, page_no): try: SITE = StackAPI(sitename) SITE.max_pages = maxpage SITE.page_size = pagesize return SITE.fetch('tags', page=page_no) except: try: SITE = StackAPI(parameter_list[count]) SITE.max_pages = maxpage SITE.page_size = pagesize return SITE.fetch('tags', page=page_no) except stackapi.StackAPIError as e: print(" Error URL: {}".format(e.url)) print(" Error Code: {}".format(e.code)) print(" Error Error: {}".format(e.error)) print(" Error Message: {}".format(e.message)) return 0
def getAnswers(): SITE = StackAPI('stackoverflow') SITE.max_pages = 1 for id in issue_answers: try: answers.append(SITE.fetch('answers/{}'.format(id))) comments.append(SITE.fetch('answers/{}/comments'.format(id))) except: time.sleep(5) answers.append(SITE.fetch('answers/{}'.format(id))) comments.append(SITE.fetch('answers/{}/comments'.format(id))) time.sleep(10)
def fetch_stackapi(text, tags, page_size=1, max_pages=1): """Método que realiza la petición a la Stack API. Recibe: text: = string de busqueda in-lin para para la busqueda tags: busca posts marcados con estos tags page_size: tamaño de páginas a recuperar max_pages: número de páginas a recuperar """ SITE = StackAPI('stackoverflow') SITE.page_size = page_size SITE.max_pages = max_pages return SITE.fetch('search', intext=text, tagged=tags, sort="relevance", filter="!7qBwspMQR3L7c4q7tesaRX(_gP(rj*U-.H")
def __init__(self): SITE = StackAPI('stackoverflow') # Number of items per page SITE.page_size = 10 # Number of api calls SITE.max_pages = 1 # Get the current date and time as a datetime object self.date = datetime.now() # Get dates for thepast week where first day of the week is Monday(0) # and last day of the week is Sunday (6) interval = self.past_week() # Get the top-rated android questions from the past week self.top = SITE.fetch('questions', fromdate=interval[0], todate=interval[1], sort='votes', tagged='android')['items'] # Get the most recent android questions self.new = SITE.fetch('questions', sort='creation', order='desc', tagged='android')['items']
def get_questions(path): conseguiu_recuperar = False while (conseguiu_recuperar is False): try: STACK_SITE = StackAPI('stackoverflow') STACK_SITE.max_pages = 1 top_viewed_questions = STACK_SITE.fetch('questions', sort='votes', order='desc') conseguiu_recuperar = True except: print("Ocorreu um erro") for question in top_viewed_questions["items"]: question['tags'] = tags_to_string(question['tags']) question['title'] = html.unescape(question['title']) return top_viewed_questions
def set_questions(year, month): ''' get a set of questions from StackAPI and store in data base (MongoDB) ''' my_collection = QuestionDAO() SITE = StackAPI('stackoverflow') SITE.page_size = 100 SITE.max_pages = 15 first_day = 1 last_day = monthrange(year, month)[1] q = SITE.fetch('questions', fromdate=date(year, month, first_day), todate=date(year, month, last_day)) for item in q["items"]: my_collection.add_question(item)
def main(): load_tags() SITE = StackAPI('stackoverflow', key='add your key here') #based on the equivalent query on StackExchange DataExplorer directly (https://data.stackexchange.com/stackoverflow/query/edit/1019759), # we should have 33343 threads returned by query below for last 1 year (8508 having at least 2 answers) # and 111,026 for last 3 years #therefore, setting the limits of the API accordingly SITE.page_size = 100 SITE.max_pages = 400 #Get questions from last 1 year: Ran on March 29, 2019 #filter created from here: https://api.stackexchange.com/docs/questions questions = SITE.fetch('questions', fromdate=datetime(2018,3,29), todate=datetime(2019,3,29), min=0, sort='votes', tagged='json', filter='!-*jbN-o8P3E5') init_corenlp() interesting_sentences = find_interesting_sentences(questions) for interesting_sentence in interesting_sentences: if isinstance(interesting_sentence, ConditionalSentence): interesting_sentence.print('|') else: interesting_sentence.print("WordPatternBaseline", '|')
def getQuestions(): SITE = StackAPI('stackoverflow') SITE.max_pages = 1 SITE.page_size = 100 lastrepo = "" for issue in issue_results: if (len(issue) >= 6 and lastrepo != getRepoName(issue[0])): print(issue) try: questions.append( SITE.fetch('search/advanced', tagged='C#', title='{}'.format(getRepoName(issue[0])))) except: time.sleep(5) questions.append( SITE.fetch('search/advanced', tagged='C#', title='{}'.format(getRepoName(issue[0])))) lastrepo = getRepoName(issue[0]) time.sleep(15)
def get_answers(ques_id): answers_id = [] answers_body = [] try: SITE = StackAPI('stackoverflow') SITE.page_size = 10 SITE.max_pages = 5 for item in ques_id: answers = SITE.fetch('questions/{}/answers'.format(item), page=1, pagesize=1, order='desc', sort='votes', filter='withbody') answers_id.append(answers[u'items'][0][u"answer_id"]) answers_body.append( BeautifulSoup(answers[u'items'][0][u"body"], "lxml").text) except StackAPIError as e: print e.message return answers_body
def retrieve_questions(sitename): """Use stack exchange API to retrieve questions Can request from scratch (request_query=True) or return a previously cached request. Use cached request to repeat experiments wihtout overloading the API. :param sitename: Name of StackExchange community :type sitename: string :return: question objects :rtype: list """ # sitename = sitename.split("/")[-1] if request_query: SITE = StackAPI(sitename, key=params["se_key"]) SITE.page_size = 50 SITE.max_pages = 1000 # max qs should be page_size * max_pages questions = SITE.fetch("questions", filter="!-*jbN-o8P3E5", sort="votes") # has q and a text with open("{}_questions_cache.json".format(sitename), "w") as f: json.dump(questions, f) else: with open("{}_questions_cache.json".format(sitename), "r") as f: questions = json.load(f) # answers["items"][0]['answer_id'] print( "quota max", questions["quota_max"], "quota remaining", questions["quota_remaining"], "total", questions["total"], "page", questions["page"], file=sys.stderr, ) print("retrieved {} questions".format(len(questions["items"]))) return questions["items"]
def fetch_results(number_of_results, days): if number_of_results > 20: number_of_results = 20 if days < 0 or number_of_results < 0: return None, None if number_of_results == 0 or days == 0: number_of_results = 10 days = 7 # with open('questions_time.pickle', 'rb') as data: # questions = pickle.load(data) # with open('questions_vote.pickle', 'rb') as data: # questions_by_vote = pickle.load(data) # return questions, questions_by_vote today = datetime.date.today() week_ago = today - datetime.timedelta(days=days) today_sec = time.mktime(today.timetuple()) + 86400 week_ago_sec = time.mktime(week_ago.timetuple()) SITE = StackAPI('stackoverflow') SITE.page_size = number_of_results SITE.max_pages = 1 # tags = SITE.fetch('tags') questions = SITE.fetch('questions', fromdate=int(week_ago_sec), todate=int(today_sec), tagged='Android', sort='creation', filter='!9YdnSIN*P') questions = questions['items'] questions_by_vote = SITE.fetch('questions', fromdate=int(week_ago_sec), todate=int(today_sec), tagged='Android', sort='votes', filter='!9YdnSIN*P') questions_by_vote = questions_by_vote['items'] print('Done fetching.') # with open('questions_time.pickle', 'wb') as output: # pickle.dump(questions, output) # with open('questions_vote.pickle', 'wb') as output: # pickle.dump(questions_by_vote, output) return questions, questions_by_vote
from stackapi import StackAPI from util import merge_whitespaces site = StackAPI('stackoverflow') site.max_pages = 10000 def soapi_search(query, *, begindate): excerpts = site.fetch('/search/excerpts', q=query, order='desc', sort='activity', fromdate=int(begindate.timestamp())) for item in excerpts['items']: item_type = item['item_type'] item_id = item['%s_id' % item_type] yield { 'id': '%s-%s' % (item_type, item_id), 'text': merge_whitespaces(item['body']), 'title': merge_whitespaces(item['title']), 'tags': ','.join(item['tags']), 'type': item_type, 'last_activity_date':
r = datetime.datetime.today().weekday()-1 # 현재 시간 now = datetime.datetime.now() tf = cal_fromdate(now, 1) # 시작시간 tt = cal_todate(now, 1) # 끝 시간 fromdate = time.mktime(datetime.datetime.strptime( tf, '%Y-%m-%d %H:%M:%S').timetuple()) todate = time.mktime(datetime.datetime.strptime( tt, '%Y-%m-%d %H:%M:%S').timetuple()) SITE = StackAPI('stackoverflow') SITE.page_size = 100 SITE.max_pages = 100 questions = SITE.fetch('questions', min=int(fromdate), max=int(todate), sort='creation', filter="!LaSRLvLhBKxW(RHyO8wrN-") idd = [] title = [] body = [] creation_date = [] tags = [] view_count = [] up_vote_count = [] for question in questions['items']: idd.append(question['question_id']) title.append(question['title']) body.append(question['body_markdown'])
def getData(tag): SITE = StackAPI('stackoverflow') SITE.max_pages=80 questions = SITE.fetch('questions', fromdate=datetime.datetime(2017,1,1), todate=datetime.datetime(2018,1,1), tagged=tag, sort='votes') return questions['items']
def config_api(num_of_queries=10): site = StackAPI('stackoverflow') site.key = 'kBC4LfDjAYFLSEFWyrDhdw(( ' site.page_size = 100 site.max_pages = int(num_of_queries) return site
from stackapi import StackAPI #using stackapi si = StackAPI('stackoverflow') #getting questions from stackoverflow website si.max_pages = 150 #getting questions from 150 pages si.page_size = 100 #getting 100 observations from each page q = si.fetch('questions', min=10) #using fetch function to get the data #print(q) data = "" c = 0 for quest in q['items']: #tags are stored under items column #print(quest['title']) c = c + 1 tags = [] if 'tags' in quest: tags = quest['tags'] for label in tags: data = data + ("__label__" + label.replace(" ", "-") + " " ) #adding the prefix to each tag data = data + (quest['title'] + "\n" ) #questions are stored in the title column #print(tags) print(c) print(data) text_file = open("questions.txt", "w") #writing the data to a file text_file.write(data) text_file.close()
# # API from stackapi import StackAPI from time import sleep from random import randint site = StackAPI('stackoverflow') # site parameters site.page_size = 100 site.max_pages = 20 #endpoint fetch users = site.fetch('users') users = pd.DataFrame(dict(users.items())['items']) #more information on users and tags tags = [] for i in range(20): tags = tags + site.fetch( '/users/{ids}/top-tags', ids=users['user_id'][i * 100:(i + 1) * 100])['items'] sleep(randint(8, 12)) # dataframe from top tags and answer score tags = pd.DataFrame(tags) indicator = pd.get_dummies(tags['tag_name']) indicator = indicator[indicator.columns].multiply(tags["answer_score"], axis="index") indicator['user_id'] = tags['user_id'] indicator = indicator.groupby('user_id').sum().reset_index()
def collect(self, *args, **options): # print(args) print(options) min = self.min_score if options['min']: min = options['min'] tagged = self.tags if options['tagged']: tagged = options['tagged'].split(',') qa_list = [] try: site = StackAPI('stackoverflow', key=APP_KEY) # site = StackAPI(self.site_name) if 'count' in options: count = options['count'] if count < 100: site.page_size = count site.max_pages = 1 else: site.max_pages = count / site.page_size epoch_str = '19700101' fromdate = time.strptime("19700101", '%Y%m%d') todate = datetime.datetime.now() if options['fromdate']: fromdate = time.strptime(options['fromdate'], '%Y%m%d') if options['todate']: todate = time.strptime(options['todate'], '%Y%m%d') # calling fetch with various parameters - http://stackapi.readthedocs.io/en/latest/user/advanced.html#calling-fetch-with-various-api-parameters questions = site.fetch('questions', min=min, tagged=tagged, sort='votes', accepted='True', fromdate=fromdate, todate=todate) while (self.wait_if_throttled(questions)): questions = site.fetch('questions', min=min, tagged=tagged, sort='votes', accepted='True', fromdate=fromdate, todate=todate) total = len(questions['items']) print('Collecting from %s. No of questions = %d' % (self.site_name, total)) processed = 0 added = 0 skipped = 0 for q in questions['items']: time.sleep( 1 / 25 ) # this is to ensure less than 30 req per second (https://api.stackexchange.com/docs/throttle) if 'accepted_answer_id' in q.keys(): question = q['title'] tags = q['tags'] aa = site.fetch('posts', ids=[q['accepted_answer_id']]) while (self.wait_if_throttled(aa)): aa = site.fetch('posts', ids=[q['accepted_answer_id']]) answer_link = aa['items'][0]['link'] answer = self.extract_accepted_answer_post(answer_link) scraped = { 'question': question, 'answer': answer, 'source': answer_link, 'tags': tags } # print scraped # print question qa_list.append(scraped) if self.add_qa(scraped): added += 1 else: skipped += 1 processed += 1 self.show_progress(processed, total, added, skipped) except StackAPIError as e: print('Failed to fetch data from stack overflow: [%s]. Skipping.' % e.message) return qa_list
#!/usr/bin/env python3 from stackapi import StackAPI import csv input_string = input() tagip = input_string.strip().replace(" ", ";") SITE = StackAPI('stackoverflow') SITE.page_size = 50 SITE.max_pages = 5 questions = SITE.fetch('questions', tagged=tagip, sort='votes') usertag = tagip.lower().replace(";", "_") arrtag = usertag.split("_") cnt = 0 dictnew = {} listfinal = [] for k in questions["items"]: flgval = 1 if "accepted_answer_id" in k.keys( ): #if k.keys has accepted_answer_id then proceed, also check if all tags are present for tval in arrtag: if tval not in k["tags"]: flgval = 0 if flgval == 1: dictnew["question_id"] = k["question_id"] dictnew["tag"] = usertag dictnew["link"] = k["link"] dictnew["tags"] = k["tags"]
def post(self, request): site = StackAPI('stackoverflow') query_parameters = [ 'fromdate', 'todate', 'min', 'sort', 'tag', 'page', 'page-size', 'order', 'max' ] query_string = f'' print(request.data) for key, value in request.data.items(): if key == 'page-size' and request.data.get( 'sort') == 'votes' and value and value != "None": # set page_size to passed value print('passing') site.page_size = value pass elif key == 'page' and request.data.get( 'votes') and value and value != "None": # set max_pages to passed value print('p2') site.max_pages = value pass elif key == 'max' or key == 'min' and value and value != "None": if request.data.get('sort') == 'hot' \ or request.data.get('sort') == 'week' or request.data.get('sort') == 'month': if value: query_string += f'{key}="{value}", ' elif request.data.get( 'sort') == 'activity' or request.data.get( 'sort') == 'creation': if value: query_string += f'{key}={value}, ' elif key == 'order' or key == 'sort' or key == 'tag' and value and value != "None": if key == 'page-size': pass else: if value: query_string += f'{key}="{value}", ' elif key in query_parameters and value and value != "None": if key == 'page-size': pass else: if value: query_string += f'{key}={value}, ' if query_string.endswith(', '): query_string = query_string[0:-2] print(f"site.fetch('questions', {query_string})") questions = eval(f"site.fetch('questions', {query_string})") existing_query = Query.objects.filter(query=query_string) print(existing_query) if existing_query.exists(): print("cached") serialized_data = self.serializer_class(existing_query, many=True) return Response(serialized_data.data, status=status.HTTP_200_OK) else: query = Query.objects.create(query=query_string, results=questions, user=request.user) serialized_data = self.serializer_class(query) return Response(serialized_data.data, status=status.HTTP_201_CREATED)
import typing from bs4 import BeautifulSoup from stackapi import StackAPI from stackrunner._meta import config ''' Pass this as key when making requests against the Stack Exchange API to receive a higher request quota. This is not considered a secret, and may be safely embed in client side code or distributed binaries. ''' APP_KEY = 'i1jWtawQVUugZZgFSlTlTg((' StackOverflowApi = StackAPI('stackoverflow', key=APP_KEY) StackOverflowApi.page_size = 50 StackOverflowApi.max_pages = 1 def fetch_code( keyword: str, config: config.RunnerConfig) -> typing.Generator[str, None, None]: question_options = { 'order': 'desc', 'sort': 'relevance', 'q': keyword, 'nottagged': config.not_tags, 'tagged': config.tags, 'filter': '!b93xdWqUwqOO7m' } answer_options = { 'order': 'desc',
from stackapi import StackAPI import ndjson import APIkey SITE = StackAPI('stackoverflow', key=APIkey.key) SITE.max_pages = 300 rep = SITE.fetch('/users/1679187/reputation-history' ) # Some high-level user (past biggest privilege threshold) for field in rep: if field != 'items': print field, rep[field] with open('sample.json', 'w') as outfile: ndjson.dump(rep['items'], outfile)