def main(): f = file("comment.classifier", "rb") classifier = pickle.load(f) f.close() db = util.makeDbConnection() classifyComments(db, classifier) db.close()
def root(): query = flask.request.args.get("q", "") logging.debug("query=%s" % query) postResults = [] userResults = [] if query: logging.debug("connecting to the database...") db = util.makeDbConnection(database) logging.debug("querying the topic model...") postResults = topicModel.queryResults(db, query, resultCutoff) logging.debug("%d results returned..." % len(postResults)) if postResults is not None: logging.debug("scoring users...") userResults = scoring.scoreUsers(db, query, postResults, topicModel, cutoffPercentile=percentileCutoff, resultCutoff=resultCutoff) logging.debug("star-scoring users...") userResults = [ userResult.starScore(cutoffPercentile=percentileCutoff, nStars=5) for userResult in userResults ] db.close() return flask.render_template("experts.html", query=query, users=userResults, posts=postResults)
def testIterate(): try: db = util.makeDbConnection("stackoverflow") dictionary = gensim.corpora.dictionary.Dictionary() for doc in StackOverflowCorpus(db, dictionary): print "doc=", doc, "dictionary=", dictionary raw_input() finally: db.close()
def testIterate(): try: db=util.makeDbConnection("stackoverflow") dictionary = gensim.corpora.dictionary.Dictionary() for doc in StackOverflowCorpus(db, dictionary): print "doc=", doc, "dictionary=",dictionary raw_input() finally: db.close()
def root(): query = flask.request.args.get("q", "") logging.debug("query=%s" % query) postResults = [] userResults = [] if query: logging.debug("connecting to the database...") db = util.makeDbConnection(database) logging.debug("querying the topic model...") postResults = topicModel.queryResults(db, query, resultCutoff) logging.debug("%d results returned..." % len(postResults)) if postResults is not None: logging.debug("scoring users..." ) userResults = scoring.scoreUsers(db, query, postResults, topicModel, cutoffPercentile=percentileCutoff, resultCutoff=resultCutoff) logging.debug("star-scoring users...") userResults = [userResult.starScore(cutoffPercentile=percentileCutoff, nStars=5) for userResult in userResults] db.close() return flask.render_template("experts.html", query=query, users=userResults, posts=postResults)
def makeStackOverflowCorpus(fileName, topic=None, usePostList=False, useTags=[]): db=util.makeDbConnection(Config.myDb) if usePostList: print "Using post list" if not useTags: useTags = util.topTags(db, 200) postList = util.tagPosts(db, useTags) print "Need to import", len(postList), "posts" else: postList = None dictionary = gensim.corpora.dictionary.Dictionary() soCorpus = StackOverflowCorpus(db, dictionary, topic, postList) try: gensim.corpora.MmCorpus.serialize(fileName + ".mm", soCorpus) finally: dictionary.save(fileName + ".dict") soCorpus.saveCorpusToPost(fileName + ".c2p") db.close() return len(postList)
def makeStackOverflowCorpus(fileName, topic=None, usePostList=False, useTags=[]): db = util.makeDbConnection(Config.myDb) if usePostList: print "Using post list" if not useTags: useTags = util.topTags(db, 200) postList = util.tagPosts(db, useTags) print "Need to import", len(postList), "posts" else: postList = None dictionary = gensim.corpora.dictionary.Dictionary() soCorpus = StackOverflowCorpus(db, dictionary, topic, postList) try: gensim.corpora.MmCorpus.serialize(fileName + ".mm", soCorpus) finally: dictionary.save(fileName + ".dict") soCorpus.saveCorpusToPost(fileName + ".c2p") db.close() return len(postList)
def main(): """ create precalculated scoring table """ db = util.makeDbConnection() createPrescoringTables(db) db.close()