def get_top_tags(self): site = stackapi.StackAPI('stackoverflow') fields = site.fetch('users/{ids}/top-tags', ids=[self.id]) top_fields = fields['items'][:10] for field in list(top_fields): self.top_tags += field['tag_name'] + " " return html2text(self.top_tags)
def get_posts_with_top_votes(self): site = stackapi.StackAPI('stackoverflow' , client_id = "15775") site.max_pages = 1 site.page_size = 10 fields = site.fetch('users/{ids}/posts' , ids=[int(self.id)] , site="stackoverflow",sort='votes' , filter='withbody' ,order="desc")['items'] print("\nREPUTATION: %d \n" %fields[0]["owner"]["reputation"]) for field in fields: print("POST TYPE: %sPOST CONTENT:\n%s" %(html2text(field['post_type']), html2text(field['body'])))
def get_top_tags(self): site = stackapi.StackAPI('stackoverflow') fields = site.fetch('users/{ids}/top-tags', ids=[self.id]) top_fields = fields['items'][:10] str = "" print("\nTOP TAGS:") for field in list(top_fields): str += field['tag_name'] + " ; " print(str + '\n')
def get_posts_with_top_votes(self): site = stackapi.StackAPI('stackoverflow', client_id=stackexchange_credentials.CLIENT_ID, key=stackexchange_credentials.CLIENT_KEY) site.max_pages = 1 site.page_size = 10 fields = site.fetch('users/{ids}/posts', ids=[int(self.id)], site="stackoverflow", sort='votes', filter='withbody', order="desc")['items'] for field in fields: self.post_text += html2text(field['body']) + ' ' return self.post_text
def proxy_change(): proxy = {'https': FreeProxy(country_id=proxy_country, timeout=0.3, rand=True).get()} logging.info('Change proxy server: ' + str(proxy['https'])) good_proxy_found = False while good_proxy_found is False: try: logging.info('validate proxy server...') SITE = stackapi.StackAPI('stackoverflow', proxy=proxy) SITE.page_size = 100 # limit per api call is 100 SITE.max_pages = 1000000 # number of api call good_proxy_found = True logging.info('This proxy server is workable: ' + str(proxy['https'])) except Exception as e: logging.warning(e) logging.info('validation failed') proxy = {'https': FreeProxy(country_id=proxy_country, timeout=0.3, rand=True).get()} logging.info('Change proxy server: ' + str(proxy['https'])) pass return SITE
TAG = "java" MAX_ERRORS = 3 # max. consecutive errors before stopping getting questions PAGE_RANGE = [1, 100] # range of pages to get questions from (both ranges included) PAGE_SIZE = 100 # items per page, maximum of 100 (recommended) MONGODB_SERVER = "localhost:27017" DB_NAME = "stackoverflow" COLLECTION_NAME = "java" PROXIES = ["localhost:9051", "localhost:9052", "localhost:9053", None] # MongoDB mongo = pymongo.MongoClient("mongodb://"+MONGODB_SERVER) db = mongo[DB_NAME] collection = db[COLLECTION_NAME] # StackAPI stack = stackapi.StackAPI("stackoverflow") stack.page_size = PAGE_SIZE stack.max_pages = 1 min_page = min(PAGE_RANGE) max_page = max(PAGE_RANGE) errors = 0 for page in range(min_page, max_page+1): stop = False while True: try: print(f"Getting questions from page {page}/{max_page}") proxy = random.choice(PROXIES) if proxy is not None: proxies = {"http": "http://"+proxy, "https": "https://"+proxy} else:
from chatbot import Chatbot, log import datetime import html import re import requests import time import stackapi ROOMS = [240, 123403, 120733] DEBUG = False CGCC = stackapi.StackAPI('codegolf.meta', key='0lYaLshi5yEGuEcK3ZxYHA((') HTML_search = re.compile( r'<a href="/questions/2140/sandbox-for-proposed-challenges/(\d+)\?r=SearchResults#\1"' ) TITLE1_search = re.compile(r'<h1.*?> *(.*?) *</h1>') TITLE2_search = re.compile(r'<h2.*?> *(.*?) *</h2>') EMPTY_LINK = '[{}](https://codegolf.meta.stackexchange.com/a/{})' SEARCH_URLS = [ 'https://codegolf.meta.stackexchange.com/search?q=inquestion%3A2140+lastactive%3A{}+score%3A0..+', 'https://codegolf.meta.stackexchange.com/search?q=inquestion%3A2140+created%3A{}+score%3A0..+' ] HTML_REPLACES = {'strong': '**', 'em': '*', 'code': '`'} POST_TIME = datetime.time( 1, # hour 0, # minute