def view_question(site_key, question_id, answer_id=None): context = {} try: context['site'] = Site.selectBy(key=site_key).getOne() except SQLObjectNotFound: raise HTTPError(code=404, output='No site exists with the key %s.' % site_key) # get the question referenced by this question id query = 'id:%s siteKey:%s' % (question_id, site_key) results = solr_conn().search(query) if len(results) == 0: raise HTTPError( code=404, output='No question exists with the ID %s for the site, %s.' % (question_id, context['site'].name)) decode_json_fields(results) retrieve_users(results) retrieve_sites(results) result = results.docs[0] convert_comments_to_html(result) if settings.REWRITE_LINKS_AND_IMAGES: rewrite_result(result) sort_answers(result) context['result'] = result context['answer_id'] = answer_id return render_template('question.html', context)
def rewrite_result(result): '''\ Rewrites the HTML in this result (question, answers and comments) so links to other StackExchange sites that exist in Stackdump are rewritten, links elsewhere are decorated with a CSS class, and all images are replaced with a placeholder. The JSON must have been decoded first. ''' app_url_root = settings.APP_URL_ROOT # get a list of all the site base URLs sites = list(Site.select()) sites_by_urls = dict([(s.base_url, s) for s in sites]) # rewrite question question = result.get('question') if question: question['body'] = _rewrite_html(question.get('body'), app_url_root, sites_by_urls) for c in question.get('comments', []): c['text'] = _rewrite_html(c.get('text'), app_url_root, sites_by_urls) # rewrite answers answers = result.get('answers') if answers: for a in answers: a['body'] = _rewrite_html(a.get('body'), app_url_root, sites_by_urls) for c in a.get('comments', []): c['text'] = _rewrite_html(c.get('text'), app_url_root, sites_by_urls)
def get_sites(): '''\ Retrieves a list of Site objects or if there are none, raises a NoSitesImportedError. This error is designed to trigger the 500 error handler. ''' sites = list(Site.select().orderBy('name')) if len(sites) == 0: raise NoSitesImportedError() return sites
def site_index(site_key): context = {} context['sites'] = get_sites() try: context['site'] = Site.selectBy(key=site_key).getOne() except SQLObjectNotFound: raise HTTPError(code=404, output='No site exists with the key %s.' % site_key) context['random_questions'] = get_random_questions( site_key=site_key, count=settings.NUM_OF_RANDOM_QUESTIONS) return render_template('index.html', context)
def list_sites(): # connect to the data sources # connect to the database print('Connecting to the database...') conn_str = settings.DATABASE_CONN_STR sqlhub.processConnection = connectionForURI(conn_str) print('Connected.\n') sites = list(Site.select()) # force the lazy method to execute if len(sites) > 0: print('[site key] site name') print('-' * 80) for site in sites: print('[%s] %s' % (site.key, site.name))
def site_search(site_key): context = {} # the template uses this to allow searching on other sites context['sites'] = get_sites() try: context['site'] = Site.selectBy(key=site_key).getOne() except SQLObjectNotFound: raise HTTPError(code=404, output='No site exists with the key %s.' % site_key) # perform the search limited by this site search_context = perform_search(site_key) if not search_context: raise HTTPError(code=500, output='Invalid query attempted.') context.update(search_context) return render_template('site_results.html', context)
def retrieve_sites(results): '''\ Retrieves the site objects associated with the results. ''' # get a list of all the site keys site_keys = set() for r in results: site_keys.add(r['siteKey']) # retrieve the site objects from the database site_objects = Site.select(IN(Site.q.key, list(site_keys))) # convert results into a dict with site key as the key sites = {} for s in site_objects: sites[s.key] = s # place site objects into the dict for r in results: site_key = r['siteKey'] r['site'] = sites[site_key]
def view_answer(site_key, answer_id): context = {} try: context['site'] = Site.selectBy(key=site_key).getOne() except SQLObjectNotFound: raise HTTPError(code=404, output='No site exists with the key %s.' % site_key) # get the question referenced by this answer id query = 'answerId:%s siteKey:%s' % (answer_id, site_key) results = solr_conn().search(query) if len(results) == 0: raise HTTPError( code=404, output='No answer exists with the ID %s for the site, %s.' % (answer_id, context['site'].name)) question_id = results.docs[0]['id'] redirect('%s%s/%s/%s' % (settings.APP_URL_ROOT, site_key, question_id, answer_id))
print('Connected.\n') # connect to solr print('Connecting to solr...') solr = Solr(settings.SOLR_URL) # pysolr doesn't try to connect until a request is made, so we'll make a ping request try: solr._send_request('GET', '%s/admin/ping' % solr.path) except socket.error, e: print('Failed to connect to solr - error was: %s' % str(e)) print('Aborting.') sys.exit(2) print('Connected.\n') site_name = None site = Site.select(Site.q.key==site_key).getOne(None) if not site: print 'Site key "%s" does not exist in database.\n' % site_key # continuing at this point means any orphaned entries in solr are # deleted as well. else: site_name = site.name sqlhub.threadConnection = sqlhub.processConnection.transaction() print('Deleting site "%s" from the database... ' % site.name) sys.stdout.flush() Site.delete(site.id) # the relationship cascades, so other rows will be deleted print('Deleted.\n') sqlhub.threadConnection.commit(close=True)
# connect to solr print('Connecting to solr...') solr = Solr(settings.SOLR_URL, assume_clean=True) # pysolr doesn't try to connect until a request is made, so we'll make a ping request try: solr._send_request('GET', 'admin/ping') except socket.error, e: print('Failed to connect to solr - error was: %s' % str(e)) print('Aborting.') sys.exit(2) print('Connected.\n') # ensure required tables exist print("Creating tables if they don't exist...") Site.createTable(ifNotExists=True) Badge.createTable(ifNotExists=True) User.createTable(ifNotExists=True) print('Created.\n') # SITE INFO # only look if they were not specified at the command line; also only if # readme.txt exists (they don't in dumps after Aug 2012) readme_path = get_file_path(xml_root, 'readme.txt') if not (site_name and dump_date) and readme_path: # get the site name from the first line of readme.txt. This could be fragile. with open(readme_path, 'r') as f: site_readme_desc = f.readline().strip() # assume if there's a colon in the name, the name part is before, and the date # part is after.
print('Connected.\n') # connect to solr print('Connecting to solr...') solr = Solr(settings.SOLR_URL) # pysolr doesn't try to connect until a request is made, so we'll make a ping request try: solr._send_request('GET', '%s/admin/ping' % solr.path) except socket.error, e: print('Failed to connect to solr - error was: %s' % str(e)) print('Aborting.') sys.exit(2) print('Connected.\n') site_name = None site = Site.select(Site.q.key == site_key).getOne(None) if not site: print 'Site key "%s" does not exist in database.\n' % site_key # continuing at this point means any orphaned entries in solr are # deleted as well. else: site_name = site.name sqlhub.threadConnection = sqlhub.processConnection.transaction() print('Deleting site "%s" from the database... ' % site.name) sys.stdout.flush() Site.delete( site.id ) # the relationship cascades, so other rows will be deleted print('Deleted.\n')
def retrieve_users(results, question_only=False, ignore_comments=False): '''\ Retrieves the user objects associated with the question objects. ''' # get a list of all the user IDs user_ids_by_site = {} for r in results: site_key = r['siteKey'] if site_key not in user_ids_by_site.keys(): user_ids_by_site[site_key] = set() # the search result object itself for k in r.keys(): if k.lower().endswith('userid'): user_ids_by_site[site_key].add(r[k]) # the question object question = r['question'] for k in question.keys(): if k.lower().endswith('userid'): user_ids_by_site[site_key].add(question[k]) comments = question.get('comments') if not ignore_comments and comments: for c in comments: for ck in c.keys(): if ck.lower().endswith('userid'): user_ids_by_site[site_key].add(c[ck]) # the answers answers = r.get('answers') if not question_only and answers: for a in answers: for k in a.keys(): if k.lower().endswith('userid'): user_ids_by_site[site_key].add(a[k]) comments = a.get('comments') if not ignore_comments and comments: for c in comments: for ck in c.keys(): if ck.lower().endswith('userid'): user_ids_by_site[site_key].add(c[ck]) # retrieve the user objects from the database by site users_by_site = {} for site_key in user_ids_by_site.keys(): site = Site.select(Site.q.key == site_key).getOne() user_objects = User.select( AND(User.q.site == site, IN(User.q.sourceId, list(user_ids_by_site[site_key])))) # convert results into a dict with user id as the key users = {} for u in user_objects: users[u.sourceId] = u users_by_site[site_key] = users # place user objects into the dict for r in results: site_key = r['siteKey'] # the search result object itself for k in r.keys(): if k.lower().endswith('userid'): # use the same field name, minus the 'Id' on the end. r[k[:-2]] = users_by_site[site_key].get(r[k]) # the question object question = r['question'] for k in question.keys(): if k.lower().endswith('userid'): # use the same field name, minus the 'Id' on the end. question[k[:-2]] = users_by_site[site_key].get(question[k]) comments = question.get('comments') if not ignore_comments and comments: for c in comments: for ck in c.keys(): if ck.lower().endswith('userid'): # use the same field name, minus the 'Id' on the end. c[ck[:-2]] = users_by_site[site_key].get(c[ck]) # the answers answers = r.get('answers') if not question_only and answers: for a in answers: for k in a.keys(): if k.lower().endswith('userid'): # use the same field name, minus the 'Id' on the end. a[k[:-2]] = users_by_site[site_key].get(a[k]) comments = a.get('comments') if not ignore_comments and comments: for c in comments: for ck in c.keys(): if ck.lower().endswith('userid'): # use the same field name, minus the 'Id' on the end. c[ck[:-2]] = users_by_site[site_key].get(c[ck])