Ejemplo n.º 1
0
def view_question(site_key, question_id, answer_id=None):
    context = {}

    try:
        context['site'] = Site.selectBy(key=site_key).getOne()
    except SQLObjectNotFound:
        raise HTTPError(code=404,
                        output='No site exists with the key %s.' % site_key)

    # get the question referenced by this question id
    query = 'id:%s siteKey:%s' % (question_id, site_key)
    results = solr_conn().search(query)
    if len(results) == 0:
        raise HTTPError(
            code=404,
            output='No question exists with the ID %s for the site, %s.' %
            (question_id, context['site'].name))

    decode_json_fields(results)
    retrieve_users(results)
    retrieve_sites(results)

    result = results.docs[0]
    convert_comments_to_html(result)
    if settings.REWRITE_LINKS_AND_IMAGES:
        rewrite_result(result)
    sort_answers(result)
    context['result'] = result

    context['answer_id'] = answer_id

    return render_template('question.html', context)
Ejemplo n.º 2
0
def rewrite_result(result):
    '''\
    Rewrites the HTML in this result (question, answers and comments) so
    links to other StackExchange sites that exist in Stackdump are rewritten,
    links elsewhere are decorated with a CSS class, and all images are replaced
    with a placeholder.
    
    The JSON must have been decoded first.
    '''
    app_url_root = settings.APP_URL_ROOT

    # get a list of all the site base URLs
    sites = list(Site.select())
    sites_by_urls = dict([(s.base_url, s) for s in sites])

    # rewrite question
    question = result.get('question')
    if question:
        question['body'] = _rewrite_html(question.get('body'), app_url_root,
                                         sites_by_urls)
        for c in question.get('comments', []):
            c['text'] = _rewrite_html(c.get('text'), app_url_root,
                                      sites_by_urls)

    # rewrite answers
    answers = result.get('answers')
    if answers:
        for a in answers:
            a['body'] = _rewrite_html(a.get('body'), app_url_root,
                                      sites_by_urls)
            for c in a.get('comments', []):
                c['text'] = _rewrite_html(c.get('text'), app_url_root,
                                          sites_by_urls)
Ejemplo n.º 3
0
def get_sites():
    '''\
    Retrieves a list of Site objects or if there are none, raises a
    NoSitesImportedError. This error is designed to trigger the 500 error
    handler.
    '''
    sites = list(Site.select().orderBy('name'))
    if len(sites) == 0:
        raise NoSitesImportedError()

    return sites
Ejemplo n.º 4
0
def site_index(site_key):
    context = {}
    context['sites'] = get_sites()

    try:
        context['site'] = Site.selectBy(key=site_key).getOne()
    except SQLObjectNotFound:
        raise HTTPError(code=404,
                        output='No site exists with the key %s.' % site_key)

    context['random_questions'] = get_random_questions(
        site_key=site_key, count=settings.NUM_OF_RANDOM_QUESTIONS)

    return render_template('index.html', context)
Ejemplo n.º 5
0
def list_sites():
    # connect to the data sources
    # connect to the database
    print('Connecting to the database...')
    conn_str = settings.DATABASE_CONN_STR
    sqlhub.processConnection = connectionForURI(conn_str)
    print('Connected.\n')
    
    sites = list(Site.select()) # force the lazy method to execute
    
    if len(sites) > 0:
        print('[site key] site name')
        print('-' * 80)
        for site in sites:
            print('[%s] %s' % (site.key, site.name))
Ejemplo n.º 6
0
def list_sites():
    # connect to the data sources
    # connect to the database
    print('Connecting to the database...')
    conn_str = settings.DATABASE_CONN_STR
    sqlhub.processConnection = connectionForURI(conn_str)
    print('Connected.\n')

    sites = list(Site.select())  # force the lazy method to execute

    if len(sites) > 0:
        print('[site key] site name')
        print('-' * 80)
        for site in sites:
            print('[%s] %s' % (site.key, site.name))
Ejemplo n.º 7
0
def site_search(site_key):
    context = {}
    # the template uses this to allow searching on other sites
    context['sites'] = get_sites()

    try:
        context['site'] = Site.selectBy(key=site_key).getOne()
    except SQLObjectNotFound:
        raise HTTPError(code=404,
                        output='No site exists with the key %s.' % site_key)

    # perform the search limited by this site
    search_context = perform_search(site_key)
    if not search_context:
        raise HTTPError(code=500, output='Invalid query attempted.')

    context.update(search_context)

    return render_template('site_results.html', context)
Ejemplo n.º 8
0
def retrieve_sites(results):
    '''\
    Retrieves the site objects associated with the results.
    '''
    # get a list of all the site keys
    site_keys = set()
    for r in results:
        site_keys.add(r['siteKey'])

    # retrieve the site objects from the database
    site_objects = Site.select(IN(Site.q.key, list(site_keys)))

    # convert results into a dict with site key as the key
    sites = {}
    for s in site_objects:
        sites[s.key] = s

    # place site objects into the dict
    for r in results:
        site_key = r['siteKey']
        r['site'] = sites[site_key]
Ejemplo n.º 9
0
def view_answer(site_key, answer_id):
    context = {}

    try:
        context['site'] = Site.selectBy(key=site_key).getOne()
    except SQLObjectNotFound:
        raise HTTPError(code=404,
                        output='No site exists with the key %s.' % site_key)

    # get the question referenced by this answer id
    query = 'answerId:%s siteKey:%s' % (answer_id, site_key)
    results = solr_conn().search(query)
    if len(results) == 0:
        raise HTTPError(
            code=404,
            output='No answer exists with the ID %s for the site, %s.' %
            (answer_id, context['site'].name))

    question_id = results.docs[0]['id']

    redirect('%s%s/%s/%s' %
             (settings.APP_URL_ROOT, site_key, question_id, answer_id))
Ejemplo n.º 10
0
 print('Connected.\n')
 
 # connect to solr
 print('Connecting to solr...')
 solr = Solr(settings.SOLR_URL)
 # pysolr doesn't try to connect until a request is made, so we'll make a ping request
 try:
     solr._send_request('GET', '%s/admin/ping' % solr.path)
 except socket.error, e:
     print('Failed to connect to solr - error was: %s' % str(e))
     print('Aborting.')
     sys.exit(2)
 print('Connected.\n') 
 
 site_name = None
 site = Site.select(Site.q.key==site_key).getOne(None)
 if not site:
     print 'Site key "%s" does not exist in database.\n' % site_key
     # continuing at this point means any orphaned entries in solr are
     # deleted as well.
 else:
     site_name = site.name
     sqlhub.threadConnection = sqlhub.processConnection.transaction()
     
     print('Deleting site "%s" from the database... ' % site.name)
     sys.stdout.flush()
     Site.delete(site.id) # the relationship cascades, so other rows will be deleted
     print('Deleted.\n')
     
     sqlhub.threadConnection.commit(close=True)
 
Ejemplo n.º 11
0
    # connect to solr
    print('Connecting to solr...')
    solr = Solr(settings.SOLR_URL, assume_clean=True)
    # pysolr doesn't try to connect until a request is made, so we'll make a ping request
    try:
        solr._send_request('GET', 'admin/ping')
    except socket.error, e:
        print('Failed to connect to solr - error was: %s' % str(e))
        print('Aborting.')
        sys.exit(2)
    print('Connected.\n')

    # ensure required tables exist
    print("Creating tables if they don't exist...")
    Site.createTable(ifNotExists=True)
    Badge.createTable(ifNotExists=True)
    User.createTable(ifNotExists=True)
    print('Created.\n')

    # SITE INFO
    # only look if they were not specified at the command line; also only if
    # readme.txt exists (they don't in dumps after Aug 2012)
    readme_path = get_file_path(xml_root, 'readme.txt')
    if not (site_name and dump_date) and readme_path:
        # get the site name from the first line of readme.txt. This could be fragile.
        with open(readme_path, 'r') as f:
            site_readme_desc = f.readline().strip()

        # assume if there's a colon in the name, the name part is before, and the date
        # part is after.
Ejemplo n.º 12
0
    print('Connected.\n')

    # connect to solr
    print('Connecting to solr...')
    solr = Solr(settings.SOLR_URL)
    # pysolr doesn't try to connect until a request is made, so we'll make a ping request
    try:
        solr._send_request('GET', '%s/admin/ping' % solr.path)
    except socket.error, e:
        print('Failed to connect to solr - error was: %s' % str(e))
        print('Aborting.')
        sys.exit(2)
    print('Connected.\n')

    site_name = None
    site = Site.select(Site.q.key == site_key).getOne(None)
    if not site:
        print 'Site key "%s" does not exist in database.\n' % site_key
        # continuing at this point means any orphaned entries in solr are
        # deleted as well.
    else:
        site_name = site.name
        sqlhub.threadConnection = sqlhub.processConnection.transaction()

        print('Deleting site "%s" from the database... ' % site.name)
        sys.stdout.flush()
        Site.delete(
            site.id
        )  # the relationship cascades, so other rows will be deleted
        print('Deleted.\n')
Ejemplo n.º 13
0
def retrieve_users(results, question_only=False, ignore_comments=False):
    '''\
    Retrieves the user objects associated with the question objects.
    '''
    # get a list of all the user IDs
    user_ids_by_site = {}
    for r in results:
        site_key = r['siteKey']
        if site_key not in user_ids_by_site.keys():
            user_ids_by_site[site_key] = set()

        # the search result object itself
        for k in r.keys():
            if k.lower().endswith('userid'):
                user_ids_by_site[site_key].add(r[k])

        # the question object
        question = r['question']
        for k in question.keys():
            if k.lower().endswith('userid'):
                user_ids_by_site[site_key].add(question[k])

            comments = question.get('comments')
            if not ignore_comments and comments:
                for c in comments:
                    for ck in c.keys():
                        if ck.lower().endswith('userid'):
                            user_ids_by_site[site_key].add(c[ck])

        # the answers
        answers = r.get('answers')
        if not question_only and answers:
            for a in answers:
                for k in a.keys():
                    if k.lower().endswith('userid'):
                        user_ids_by_site[site_key].add(a[k])

                comments = a.get('comments')
                if not ignore_comments and comments:
                    for c in comments:
                        for ck in c.keys():
                            if ck.lower().endswith('userid'):
                                user_ids_by_site[site_key].add(c[ck])

    # retrieve the user objects from the database by site
    users_by_site = {}
    for site_key in user_ids_by_site.keys():
        site = Site.select(Site.q.key == site_key).getOne()
        user_objects = User.select(
            AND(User.q.site == site,
                IN(User.q.sourceId, list(user_ids_by_site[site_key]))))

        # convert results into a dict with user id as the key
        users = {}
        for u in user_objects:
            users[u.sourceId] = u

        users_by_site[site_key] = users

    # place user objects into the dict
    for r in results:
        site_key = r['siteKey']

        # the search result object itself
        for k in r.keys():
            if k.lower().endswith('userid'):
                # use the same field name, minus the 'Id' on the end.
                r[k[:-2]] = users_by_site[site_key].get(r[k])

        # the question object
        question = r['question']
        for k in question.keys():
            if k.lower().endswith('userid'):
                # use the same field name, minus the 'Id' on the end.
                question[k[:-2]] = users_by_site[site_key].get(question[k])

        comments = question.get('comments')
        if not ignore_comments and comments:
            for c in comments:
                for ck in c.keys():
                    if ck.lower().endswith('userid'):
                        # use the same field name, minus the 'Id' on the end.
                        c[ck[:-2]] = users_by_site[site_key].get(c[ck])

        # the answers
        answers = r.get('answers')
        if not question_only and answers:
            for a in answers:
                for k in a.keys():
                    if k.lower().endswith('userid'):
                        # use the same field name, minus the 'Id' on the end.
                        a[k[:-2]] = users_by_site[site_key].get(a[k])

                comments = a.get('comments')
                if not ignore_comments and comments:
                    for c in comments:
                        for ck in c.keys():
                            if ck.lower().endswith('userid'):
                                # use the same field name, minus the 'Id' on the end.
                                c[ck[:-2]] = users_by_site[site_key].get(c[ck])
Ejemplo n.º 14
0
    # connect to solr
    print('Connecting to solr...')
    solr = Solr(settings.SOLR_URL, assume_clean=True)
    # pysolr doesn't try to connect until a request is made, so we'll make a ping request
    try:
        solr._send_request('GET', 'admin/ping')
    except socket.error, e:
        print('Failed to connect to solr - error was: %s' % str(e))
        print('Aborting.')
        sys.exit(2)
    print('Connected.\n')

    # ensure required tables exist
    print("Creating tables if they don't exist...")
    Site.createTable(ifNotExists=True)
    Badge.createTable(ifNotExists=True)
    User.createTable(ifNotExists=True)
    print('Created.\n')

    # SITE INFO
    # only look if they were not specified at the command line; also only if
    # readme.txt exists (they don't in dumps after Aug 2012)
    readme_path = get_file_path(xml_root, 'readme.txt')
    if not (site_name and dump_date) and readme_path:
        # get the site name from the first line of readme.txt. This could be fragile.
        with open(readme_path, 'r') as f:
            site_readme_desc = f.readline().strip()

        # assume if there's a colon in the name, the name part is before, and the date
        # part is after.