Ejemplo n.º 1
0
 def get_top_tags(self):
     site = stackapi.StackAPI('stackoverflow')
     fields = site.fetch('users/{ids}/top-tags', ids=[self.id])
     top_fields = fields['items'][:10]
     for field in list(top_fields):
         self.top_tags += field['tag_name'] + " "
     return html2text(self.top_tags)
Ejemplo n.º 2
0
 def get_posts_with_top_votes(self):
     site = stackapi.StackAPI('stackoverflow' , client_id = "15775")
     site.max_pages = 1
     site.page_size = 10
     fields = site.fetch('users/{ids}/posts' , ids=[int(self.id)] , site="stackoverflow",sort='votes' , filter='withbody' ,order="desc")['items']
     print("\nREPUTATION: %d \n" %fields[0]["owner"]["reputation"])
     for field in fields:
         print("POST TYPE: %sPOST CONTENT:\n%s" %(html2text(field['post_type']), html2text(field['body'])))
Ejemplo n.º 3
0
 def get_top_tags(self):
     site = stackapi.StackAPI('stackoverflow')
     fields = site.fetch('users/{ids}/top-tags', ids=[self.id])
     top_fields = fields['items'][:10]
     str = ""
     print("\nTOP TAGS:")
     for field in list(top_fields):
         str += field['tag_name'] + " ; "
     print(str + '\n')
Ejemplo n.º 4
0
 def get_posts_with_top_votes(self):
     site = stackapi.StackAPI('stackoverflow',
                              client_id=stackexchange_credentials.CLIENT_ID,
                              key=stackexchange_credentials.CLIENT_KEY)
     site.max_pages = 1
     site.page_size = 10
     fields = site.fetch('users/{ids}/posts',
                         ids=[int(self.id)],
                         site="stackoverflow",
                         sort='votes',
                         filter='withbody',
                         order="desc")['items']
     for field in fields:
         self.post_text += html2text(field['body']) + ' '
     return self.post_text
Ejemplo n.º 5
0
def proxy_change():
    proxy = {'https': FreeProxy(country_id=proxy_country, timeout=0.3, rand=True).get()}
    logging.info('Change proxy server: ' + str(proxy['https']))
    good_proxy_found = False
    while good_proxy_found is False:
        try:
            logging.info('validate proxy server...')
            SITE = stackapi.StackAPI('stackoverflow', proxy=proxy)
            SITE.page_size = 100  # limit per api call is 100
            SITE.max_pages = 1000000  # number of api call
            good_proxy_found = True
            logging.info('This proxy server is workable: ' + str(proxy['https']))
        except Exception as e:
            logging.warning(e)
            logging.info('validation failed')
            proxy = {'https': FreeProxy(country_id=proxy_country, timeout=0.3, rand=True).get()}
            logging.info('Change proxy server: ' + str(proxy['https']))
            pass
    return SITE
Ejemplo n.º 6
0
TAG = "java"
MAX_ERRORS = 3  # max. consecutive errors before stopping getting questions
PAGE_RANGE = [1, 100]  # range of pages to get questions from (both ranges included)
PAGE_SIZE = 100  # items per page, maximum of 100 (recommended)
MONGODB_SERVER = "localhost:27017"
DB_NAME = "stackoverflow"
COLLECTION_NAME = "java"
PROXIES = ["localhost:9051", "localhost:9052", "localhost:9053", None]

# MongoDB
mongo = pymongo.MongoClient("mongodb://"+MONGODB_SERVER)
db = mongo[DB_NAME]
collection = db[COLLECTION_NAME]

# StackAPI
stack = stackapi.StackAPI("stackoverflow")
stack.page_size = PAGE_SIZE
stack.max_pages = 1
min_page = min(PAGE_RANGE)
max_page = max(PAGE_RANGE)
errors = 0

for page in range(min_page, max_page+1):
    stop = False
    while True:
        try:
            print(f"Getting questions from page {page}/{max_page}")
            proxy = random.choice(PROXIES)
            if proxy is not None:
                proxies = {"http": "http://"+proxy, "https": "https://"+proxy}
            else:
Ejemplo n.º 7
0
from chatbot import Chatbot, log

import datetime
import html
import re
import requests
import time

import stackapi

ROOMS = [240, 123403, 120733]
DEBUG = False

CGCC = stackapi.StackAPI('codegolf.meta', key='0lYaLshi5yEGuEcK3ZxYHA((')
HTML_search = re.compile(
    r'<a href="/questions/2140/sandbox-for-proposed-challenges/(\d+)\?r=SearchResults#\1"'
)
TITLE1_search = re.compile(r'<h1.*?> *(.*?) *</h1>')
TITLE2_search = re.compile(r'<h2.*?> *(.*?) *</h2>')
EMPTY_LINK = '[{}](https://codegolf.meta.stackexchange.com/a/{})'

SEARCH_URLS = [
    'https://codegolf.meta.stackexchange.com/search?q=inquestion%3A2140+lastactive%3A{}+score%3A0..+',
    'https://codegolf.meta.stackexchange.com/search?q=inquestion%3A2140+created%3A{}+score%3A0..+'
]

HTML_REPLACES = {'strong': '**', 'em': '*', 'code': '`'}

POST_TIME = datetime.time(
    1,  # hour
    0,  # minute