def get_stored_questions(API_name,
                         Start_date=Date(1, 1, 2010),
                         End_date=Date(12, 18, 2018)):
    site = StackAPI('stackoverflow')

    site.max_pages = 1

    questions = site.fetch('questions',
                           fromdate=datetime(2010, 1, 1),
                           todate=datetime(2018, 12, 18),
                           tagged=API_name)

    stored_quest = dict()
    stored_quests = list()

    # accesses the dict that is stored at questions['items]
    for lists in questions['items']:
        # stores only the relevant key value pairs into a new dict stored_quests
        stored_quest = {
            "title": lists.get("title"),
            "score": lists.get("score"),
            "is_answered": lists.get("is_answered"),
            "tags": lists.get("tags"),
            "question_id": lists.get("question_id"),
            "link": lists.get("link")
        }
        stored_quests.append(stored_quest)

    return stored_quests
Example #2
0
def identify_questions(tags):
    ques_id = []
    ques_score = []
    ques_body = []
    query = ""
    for i in range(len(tags)):
        tags[i] = ps.stem(tags[i])
        query = query + tags[i] + ';'
    query = query[0:-1]
    try:
        SITE = StackAPI('stackoverflow')
        SITE.page_size = page_size
        SITE.max_pages = max_pages
        questions = SITE.fetch('search', tagged=query, sort='relevance')
        for item in questions[u'items']:
            tags_ques = item[u'tags']
            for i in range(len(tags_ques)):
                tags_ques[i] = ps.stem(tags_ques[i])
            cnt = 0
            for tag in tags_ques:
                if tag not in tags:
                    cnt += 1
            temp = len(tags) - len(list(set(tags).intersection(tags_ques)))
            cnt = cnt + (temp - len(tags))
            if cnt < 0:
                ques_id.append(item[u'question_id'])
                ques_score.append(cnt)
                ques_body.append(item[u'title'])

    except StackAPIError as e:
        print e.message
    print ques_id[1]
    return ques_id, ques_score, ques_body
Example #3
0
def buscar_questoes(tag="python"):
    # definicao do pt.stackoverflow
    sopt = StackAPI("pt.stackoverflow")

    # conf de numero de resultados
    sopt.page_size = 100
    sopt.max_pages = 1
    resultado = []
    # busca por questoes/tag de acordo com intervalo de tempo(atualmente de 1 dia)
    questoes_python = sopt.fetch('questions',
                                 min=1,
                                 fromdate=tsInicio,
                                 todate=tsHoje,
                                 tagged=tag)
    # return str(html.unescape(questoes_python['items'][0]['title']))

    for i in range(0, len(questoes_python['items'])):
        resultado.append("""
        Titulo: {}
        Link: {}
        Criacao: {}
        """.format(html.unescape(questoes_python['items'][i]['title']),
                   questoes_python['items'][i]['link'],
                   questoes_python['items'][i]['creation_date']))
    return resultado
Example #4
0
def api_query():
    SITE = StackAPI('stackoverflow')
    SITE.max_pages = 200
    questions = SITE.fetch('questions/no-answers',
                           order='desc',
                           fromdate=int(time.time()) - 3600 * 72,
                           sort='creation',
                           tagged='google-cloud-platform')
    question_table = [
        dict(title='TITLE',
             date='DATE',
             tags='TAGS',
             views='PAGE VIEWS',
             link='LINK')
    ]
    print(question_table)
    for question in questions['items']:
        current = dict(title=question['title'],
                       date=time.strftime(
                           '%m-%d %H:%M',
                           time.localtime(question['creation_date'])),
                       tags=', '.join(
                           str(e) for e in question['tags']
                           if e != 'google-cloud-platform'),
                       views=str(question['view_count']),
                       link=question['link'])
        question_table.append(current)

    return render_template('query.html', questions=question_table)
def get_stored_answers(id):
    site = StackAPI('stackoverflow')

    site.max_pages = 1

    answers = site.fetch('answers', id)

    return answers
Example #6
0
def getUsers():
  SITE = StackAPI('stackoverflow')
  SITE.max_pages = 1
  SITE.page_size = 100
  try:
    users.append(SITE.fetch('users', sort='reputation'))
  except:
    time.sleep(5)
    users.append(SITE.fetch('users', sort='reputation'))
Example #7
0
def test_buscar_questoes():
    sopt = StackAPI("pt.stackoverflow")
    sopt.page_size = 100
    sopt.max_pages = 1
    questoes_python = sopt.fetch('questions',
                                 min=1,
                                 fromdate=1534582800,
                                 todate=1534636800,
                                 tagged='python')
    assert 1534625951 == questoes_python['items'][0]['creation_date']
Example #8
0
def getQuestions():
    SITE = StackAPI('stackoverflow')
    SITE.max_pages = 10
    SITE.page_size = 100
    try:
        questions.append(SITE.fetch('questions', tagged='C#', sort='votes'))
    except:
        time.sleep(5)
        questions.append(SITE.fetch('questions', tagged='C#', sort='votes'))
    time.sleep(15)
Example #9
0
def get_tags(parameter_list, count, sitename, maxpage, pagesize, page_no):
    try:
        SITE = StackAPI(sitename)
        SITE.max_pages = maxpage
        SITE.page_size = pagesize
        return SITE.fetch('tags', page=page_no)
    except:
        try:
            SITE = StackAPI(parameter_list[count])
            SITE.max_pages = maxpage
            SITE.page_size = pagesize
            return SITE.fetch('tags', page=page_no)
        except stackapi.StackAPIError as e:
            print(" Error URL: {}".format(e.url))
            print(" Error Code: {}".format(e.code))
            print(" Error Error: {}".format(e.error))
            print(" Error Message: {}".format(e.message))

            return 0
Example #10
0
def getAnswers():
    SITE = StackAPI('stackoverflow')
    SITE.max_pages = 1
    for id in issue_answers:
        try:
            answers.append(SITE.fetch('answers/{}'.format(id)))
            comments.append(SITE.fetch('answers/{}/comments'.format(id)))
        except:
            time.sleep(5)
            answers.append(SITE.fetch('answers/{}'.format(id)))
            comments.append(SITE.fetch('answers/{}/comments'.format(id)))
        time.sleep(10)
def fetch_stackapi(text, tags, page_size=1, max_pages=1):
    """Método que realiza la petición a la Stack API. Recibe:
       text: = string de busqueda in-lin para para la busqueda
       tags: busca posts marcados con estos tags
       page_size: tamaño de páginas a recuperar
       max_pages: número de páginas a recuperar
       """

    SITE = StackAPI('stackoverflow')
    SITE.page_size = page_size
    SITE.max_pages = max_pages
    return SITE.fetch('search',
                      intext=text,
                      tagged=tags,
                      sort="relevance",
                      filter="!7qBwspMQR3L7c4q7tesaRX(_gP(rj*U-.H")
Example #12
0
	def __init__(self):
		SITE = StackAPI('stackoverflow')		
		# Number of items per page
		SITE.page_size = 10
		# Number of api calls
		SITE.max_pages = 1
		# Get the current date and time as a datetime object
		self.date = datetime.now()
		# Get dates for thepast week where first day of the week is Monday(0)
		# and last day of the week is Sunday (6)
		interval = self.past_week()
		# Get the top-rated android questions from the past week
		self.top = SITE.fetch('questions', fromdate=interval[0], 
			todate=interval[1], sort='votes', tagged='android')['items']
		# Get the most recent android questions
		self.new =  SITE.fetch('questions', sort='creation', 
			order='desc', tagged='android')['items']
def get_questions(path):
    conseguiu_recuperar = False
    while (conseguiu_recuperar is False):
        try:
            STACK_SITE = StackAPI('stackoverflow')
            STACK_SITE.max_pages = 1
            top_viewed_questions = STACK_SITE.fetch('questions',
                                                    sort='votes',
                                                    order='desc')
            conseguiu_recuperar = True
        except:
            print("Ocorreu um erro")

    for question in top_viewed_questions["items"]:
        question['tags'] = tags_to_string(question['tags'])
        question['title'] = html.unescape(question['title'])

    return top_viewed_questions
Example #14
0
def set_questions(year, month):
    ''' get a set of questions from StackAPI and store in data base (MongoDB)
    '''
    my_collection = QuestionDAO()

    SITE = StackAPI('stackoverflow')
    SITE.page_size = 100
    SITE.max_pages = 15

    first_day = 1
    last_day = monthrange(year, month)[1]

    q = SITE.fetch('questions',
                   fromdate=date(year, month, first_day),
                   todate=date(year, month, last_day))

    for item in q["items"]:
        my_collection.add_question(item)
Example #15
0
def main():
	load_tags()
	SITE = StackAPI('stackoverflow', key='add your key here')
	#based on the equivalent query on StackExchange DataExplorer directly (https://data.stackexchange.com/stackoverflow/query/edit/1019759), 
	# we should have 33343 threads returned by query below for last 1 year (8508 having at least 2 answers)
	# and 111,026 for last 3 years
	#therefore, setting the limits of the API accordingly
	SITE.page_size = 100
	SITE.max_pages = 400
	#Get questions from last 1 year: Ran on March 29, 2019
	#filter created from here: https://api.stackexchange.com/docs/questions
	questions = SITE.fetch('questions', fromdate=datetime(2018,3,29), todate=datetime(2019,3,29), min=0, sort='votes', tagged='json', filter='!-*jbN-o8P3E5')
	init_corenlp()
	interesting_sentences = find_interesting_sentences(questions)

	for interesting_sentence in interesting_sentences:
		if isinstance(interesting_sentence, ConditionalSentence):
			interesting_sentence.print('|')
		else:
			interesting_sentence.print("WordPatternBaseline", '|')
def getQuestions():
    SITE = StackAPI('stackoverflow')
    SITE.max_pages = 1
    SITE.page_size = 100
    lastrepo = ""
    for issue in issue_results:
        if (len(issue) >= 6 and lastrepo != getRepoName(issue[0])):
            print(issue)
            try:
                questions.append(
                    SITE.fetch('search/advanced',
                               tagged='C#',
                               title='{}'.format(getRepoName(issue[0]))))
            except:
                time.sleep(5)
                questions.append(
                    SITE.fetch('search/advanced',
                               tagged='C#',
                               title='{}'.format(getRepoName(issue[0]))))
            lastrepo = getRepoName(issue[0])
            time.sleep(15)
Example #17
0
def get_answers(ques_id):
    answers_id = []
    answers_body = []
    try:
        SITE = StackAPI('stackoverflow')
        SITE.page_size = 10
        SITE.max_pages = 5
        for item in ques_id:
            answers = SITE.fetch('questions/{}/answers'.format(item),
                                 page=1,
                                 pagesize=1,
                                 order='desc',
                                 sort='votes',
                                 filter='withbody')
            answers_id.append(answers[u'items'][0][u"answer_id"])
            answers_body.append(
                BeautifulSoup(answers[u'items'][0][u"body"], "lxml").text)

    except StackAPIError as e:
        print e.message
    return answers_body
Example #18
0
def retrieve_questions(sitename):
    """Use stack exchange API to retrieve questions

    Can request from scratch (request_query=True) or return a previously cached request.
    Use cached request to repeat experiments wihtout overloading the API.

    :param sitename: Name of StackExchange community
    :type sitename: string
    :return: question objects
    :rtype: list 

    """
    # sitename = sitename.split("/")[-1]
    if request_query:
        SITE = StackAPI(sitename, key=params["se_key"])
        SITE.page_size = 50
        SITE.max_pages = 1000  # max qs should be page_size * max_pages
        questions = SITE.fetch("questions",
                               filter="!-*jbN-o8P3E5",
                               sort="votes")  # has q and a text
        with open("{}_questions_cache.json".format(sitename), "w") as f:
            json.dump(questions, f)
    else:
        with open("{}_questions_cache.json".format(sitename), "r") as f:
            questions = json.load(f)
    # answers["items"][0]['answer_id']
    print(
        "quota max",
        questions["quota_max"],
        "quota remaining",
        questions["quota_remaining"],
        "total",
        questions["total"],
        "page",
        questions["page"],
        file=sys.stderr,
    )
    print("retrieved {} questions".format(len(questions["items"])))
    return questions["items"]
Example #19
0
def fetch_results(number_of_results, days):
    if number_of_results > 20:
        number_of_results = 20
    if days < 0 or number_of_results < 0:
        return None, None
    if number_of_results == 0 or days == 0:
        number_of_results = 10
        days = 7

    # with open('questions_time.pickle', 'rb') as data:
    #     questions = pickle.load(data)
    # with open('questions_vote.pickle', 'rb') as data:
    #     questions_by_vote = pickle.load(data)
    # return questions, questions_by_vote

    today = datetime.date.today()
    week_ago = today - datetime.timedelta(days=days)
    today_sec = time.mktime(today.timetuple()) + 86400
    week_ago_sec = time.mktime(week_ago.timetuple())
    SITE = StackAPI('stackoverflow')
    SITE.page_size = number_of_results
    SITE.max_pages = 1
    # tags = SITE.fetch('tags')
    questions = SITE.fetch('questions', fromdate=int(week_ago_sec), todate=int(today_sec),
                           tagged='Android', sort='creation', filter='!9YdnSIN*P')
    questions = questions['items']
    questions_by_vote = SITE.fetch('questions', fromdate=int(week_ago_sec), todate=int(today_sec),
                                   tagged='Android', sort='votes', filter='!9YdnSIN*P')
    questions_by_vote = questions_by_vote['items']

    print('Done fetching.')
    # with open('questions_time.pickle', 'wb') as output:
    #     pickle.dump(questions, output)
    # with open('questions_vote.pickle', 'wb') as output:
    #     pickle.dump(questions_by_vote, output)

    return questions, questions_by_vote
from stackapi import StackAPI

from util import merge_whitespaces

site = StackAPI('stackoverflow')
site.max_pages = 10000


def soapi_search(query, *, begindate):
    excerpts = site.fetch('/search/excerpts',
                          q=query,
                          order='desc',
                          sort='activity',
                          fromdate=int(begindate.timestamp()))

    for item in excerpts['items']:
        item_type = item['item_type']
        item_id = item['%s_id' % item_type]

        yield {
            'id':
            '%s-%s' % (item_type, item_id),
            'text':
            merge_whitespaces(item['body']),
            'title':
            merge_whitespaces(item['title']),
            'tags':
            ','.join(item['tags']),
            'type':
            item_type,
            'last_activity_date':
Example #21
0
r = datetime.datetime.today().weekday()-1

# 현재 시간
now = datetime.datetime.now()
tf = cal_fromdate(now, 1)  # 시작시간
tt = cal_todate(now, 1)  # 끝 시간

fromdate = time.mktime(datetime.datetime.strptime(
    tf, '%Y-%m-%d %H:%M:%S').timetuple())

todate = time.mktime(datetime.datetime.strptime(
    tt, '%Y-%m-%d %H:%M:%S').timetuple())

SITE = StackAPI('stackoverflow')
SITE.page_size = 100
SITE.max_pages = 100
questions = SITE.fetch('questions', min=int(fromdate), max=int(todate), sort='creation',
                       filter="!LaSRLvLhBKxW(RHyO8wrN-")

idd = []
title = []
body = []
creation_date = []
tags = []
view_count = []
up_vote_count = []

for question in questions['items']:
    idd.append(question['question_id'])
    title.append(question['title'])
    body.append(question['body_markdown'])
Example #22
0
def getData(tag):
    SITE = StackAPI('stackoverflow')
    SITE.max_pages=80
    questions = SITE.fetch('questions', fromdate=datetime.datetime(2017,1,1), todate=datetime.datetime(2018,1,1), tagged=tag, sort='votes')
    return questions['items']
Example #23
0
def config_api(num_of_queries=10):
    site = StackAPI('stackoverflow')
    site.key = 'kBC4LfDjAYFLSEFWyrDhdw(( '
    site.page_size = 100
    site.max_pages = int(num_of_queries)
    return site
from stackapi import StackAPI  #using stackapi
si = StackAPI('stackoverflow')  #getting questions from stackoverflow website
si.max_pages = 150  #getting questions from 150 pages
si.page_size = 100  #getting 100 observations from each page
q = si.fetch('questions', min=10)  #using fetch function to get the data
#print(q)
data = ""
c = 0
for quest in q['items']:  #tags are stored under items column
    #print(quest['title'])
    c = c + 1
    tags = []
    if 'tags' in quest:
        tags = quest['tags']
    for label in tags:
        data = data + ("__label__" + label.replace(" ", "-") + " "
                       )  #adding the prefix to each tag
    data = data + (quest['title'] + "\n"
                   )  #questions are stored in the title column
    #print(tags)
print(c)
print(data)
text_file = open("questions.txt", "w")  #writing the data to a file
text_file.write(data)
text_file.close()
Example #25
0
# # API

from stackapi import StackAPI
from time import sleep
from random import randint

site = StackAPI('stackoverflow')

# site parameters
site.page_size = 100
site.max_pages = 20

#endpoint fetch
users = site.fetch('users')
users = pd.DataFrame(dict(users.items())['items'])

#more information on users and tags
tags = []
for i in range(20):
    tags = tags + site.fetch(
        '/users/{ids}/top-tags',
        ids=users['user_id'][i * 100:(i + 1) * 100])['items']
    sleep(randint(8, 12))

# dataframe from top tags and answer score
tags = pd.DataFrame(tags)
indicator = pd.get_dummies(tags['tag_name'])
indicator = indicator[indicator.columns].multiply(tags["answer_score"],
                                                  axis="index")
indicator['user_id'] = tags['user_id']
indicator = indicator.groupby('user_id').sum().reset_index()
Example #26
0
    def collect(self, *args, **options):
        # print(args)
        print(options)
        min = self.min_score
        if options['min']:
            min = options['min']
        tagged = self.tags
        if options['tagged']:
            tagged = options['tagged'].split(',')
        qa_list = []
        try:
            site = StackAPI('stackoverflow', key=APP_KEY)
            # site = StackAPI(self.site_name)
            if 'count' in options:
                count = options['count']
                if count < 100:
                    site.page_size = count
                    site.max_pages = 1
                else:
                    site.max_pages = count / site.page_size

            epoch_str = '19700101'
            fromdate = time.strptime("19700101", '%Y%m%d')
            todate = datetime.datetime.now()
            if options['fromdate']:
                fromdate = time.strptime(options['fromdate'], '%Y%m%d')
            if options['todate']:
                todate = time.strptime(options['todate'], '%Y%m%d')
            # calling fetch with various parameters - http://stackapi.readthedocs.io/en/latest/user/advanced.html#calling-fetch-with-various-api-parameters
            questions = site.fetch('questions',
                                   min=min,
                                   tagged=tagged,
                                   sort='votes',
                                   accepted='True',
                                   fromdate=fromdate,
                                   todate=todate)
            while (self.wait_if_throttled(questions)):
                questions = site.fetch('questions',
                                       min=min,
                                       tagged=tagged,
                                       sort='votes',
                                       accepted='True',
                                       fromdate=fromdate,
                                       todate=todate)
            total = len(questions['items'])
            print('Collecting from %s. No of questions = %d' %
                  (self.site_name, total))
            processed = 0
            added = 0
            skipped = 0
            for q in questions['items']:
                time.sleep(
                    1 / 25
                )  # this is to ensure less than 30 req per second (https://api.stackexchange.com/docs/throttle)
                if 'accepted_answer_id' in q.keys():
                    question = q['title']
                    tags = q['tags']
                    aa = site.fetch('posts', ids=[q['accepted_answer_id']])
                    while (self.wait_if_throttled(aa)):
                        aa = site.fetch('posts', ids=[q['accepted_answer_id']])
                    answer_link = aa['items'][0]['link']
                    answer = self.extract_accepted_answer_post(answer_link)
                    scraped = {
                        'question': question,
                        'answer': answer,
                        'source': answer_link,
                        'tags': tags
                    }
                    # print scraped
                    # print question
                    qa_list.append(scraped)
                    if self.add_qa(scraped):
                        added += 1
                    else:
                        skipped += 1
                processed += 1
                self.show_progress(processed, total, added, skipped)
        except StackAPIError as e:
            print('Failed to fetch data from stack overflow: [%s]. Skipping.' %
                  e.message)
        return qa_list
Example #27
0
#!/usr/bin/env python3

from stackapi import StackAPI
import csv

input_string = input()
tagip = input_string.strip().replace(" ", ";")

SITE = StackAPI('stackoverflow')
SITE.page_size = 50
SITE.max_pages = 5
questions = SITE.fetch('questions', tagged=tagip, sort='votes')

usertag = tagip.lower().replace(";", "_")
arrtag = usertag.split("_")

cnt = 0
dictnew = {}
listfinal = []
for k in questions["items"]:
    flgval = 1
    if "accepted_answer_id" in k.keys(
    ):  #if k.keys has accepted_answer_id then proceed, also check if all tags are present
        for tval in arrtag:
            if tval not in k["tags"]:
                flgval = 0
        if flgval == 1:
            dictnew["question_id"] = k["question_id"]
            dictnew["tag"] = usertag
            dictnew["link"] = k["link"]
            dictnew["tags"] = k["tags"]
    def post(self, request):
        site = StackAPI('stackoverflow')
        query_parameters = [
            'fromdate', 'todate', 'min', 'sort', 'tag', 'page', 'page-size',
            'order', 'max'
        ]
        query_string = f''
        print(request.data)

        for key, value in request.data.items():
            if key == 'page-size' and request.data.get(
                    'sort') == 'votes' and value and value != "None":
                # set page_size to passed value
                print('passing')
                site.page_size = value
                pass
            elif key == 'page' and request.data.get(
                    'votes') and value and value != "None":
                # set max_pages to passed value
                print('p2')
                site.max_pages = value
                pass
            elif key == 'max' or key == 'min' and value and value != "None":
                if request.data.get('sort') == 'hot' \
                    or request.data.get('sort') == 'week' or request.data.get('sort') == 'month':
                    if value:
                        query_string += f'{key}="{value}", '
                elif request.data.get(
                        'sort') == 'activity' or request.data.get(
                            'sort') == 'creation':
                    if value:
                        query_string += f'{key}={value}, '
            elif key == 'order' or key == 'sort' or key == 'tag' and value and value != "None":
                if key == 'page-size':
                    pass
                else:
                    if value:
                        query_string += f'{key}="{value}", '
            elif key in query_parameters and value and value != "None":
                if key == 'page-size':
                    pass
                else:
                    if value:
                        query_string += f'{key}={value}, '

        if query_string.endswith(', '):
            query_string = query_string[0:-2]

        print(f"site.fetch('questions', {query_string})")
        questions = eval(f"site.fetch('questions', {query_string})")
        existing_query = Query.objects.filter(query=query_string)
        print(existing_query)

        if existing_query.exists():
            print("cached")
            serialized_data = self.serializer_class(existing_query, many=True)
            return Response(serialized_data.data, status=status.HTTP_200_OK)
        else:
            query = Query.objects.create(query=query_string,
                                         results=questions,
                                         user=request.user)
            serialized_data = self.serializer_class(query)
            return Response(serialized_data.data,
                            status=status.HTTP_201_CREATED)
import typing

from bs4 import BeautifulSoup
from stackapi import StackAPI

from stackrunner._meta import config
'''
Pass this as key when making requests against the Stack Exchange API to receive a higher request quota.

This is not considered a secret, and may be safely embed in client side code or distributed binaries.
'''
APP_KEY = 'i1jWtawQVUugZZgFSlTlTg(('

StackOverflowApi = StackAPI('stackoverflow', key=APP_KEY)
StackOverflowApi.page_size = 50
StackOverflowApi.max_pages = 1


def fetch_code(
        keyword: str,
        config: config.RunnerConfig) -> typing.Generator[str, None, None]:
    question_options = {
        'order': 'desc',
        'sort': 'relevance',
        'q': keyword,
        'nottagged': config.not_tags,
        'tagged': config.tags,
        'filter': '!b93xdWqUwqOO7m'
    }
    answer_options = {
        'order': 'desc',
Example #30
0
from stackapi import StackAPI
import ndjson
import APIkey

SITE = StackAPI('stackoverflow', key=APIkey.key)
SITE.max_pages = 300

rep = SITE.fetch('/users/1679187/reputation-history'
                 )  # Some high-level user (past biggest privilege threshold)

for field in rep:
    if field != 'items':
        print field, rep[field]

with open('sample.json', 'w') as outfile:
    ndjson.dump(rep['items'], outfile)