def mysql_resultset(wp_pr, start, end, opts): '''Returns an iterable MySql resultset using a server side cursor that can be used to iterate the data. Alternavively, the `dump_data_iterator()` method dumps the data onto disk before aggregation. ''' # query = mysql_config.construct_rc_query(db_name) query = mysql_config.construct_cu_query(wp_pr=wp_pr,start=start, end=end) logger.debug("SQL query for %s for start=%s, end=%s:\n\t%s"%(wp_pr, start, end, query)) cur = mysql_config.get_analytics_cursor(wp_pr, opts, server_side=True) cur.execute(query) return cur
def mysql_resultset(wp_pr, start, end, opts): ''' Returns an iterable MySql resultset using a server side cursor that can be used to iterate the data. Alternavively, the `dump_data_iterator()` method dumps the data onto disk before aggregation. ''' # query = mysql_config.construct_rc_query(db_name) query = mysql_config.construct_cu_query(wp_pr=wp_pr, start=start, end=end) logger.debug("SQL query for %s for start=%s, end=%s:\n\t%s" % (wp_pr, start, end, query)) cur = mysql_config.get_analytics_cursor(wp_pr, opts, server_side=True) cur.execute(query) return cur
def retrieve_bot_list(wp_pr, opts): '''Returns a set of all known bots for `wp_pr`. Bots are not labeled in a chohesive manner for Wikipedia. We use the union of the bots used for the [Wikipedia statistics](stats.wikimedia.org/), stored in `./data/erikZ.bots` and the `user_group.ug_group='bot'` flag in the MySql database. ''' bot_fn = os.path.join(os.path.split(__file__)[0], 'data', 'erikZ.bots') erikZ_bots = set(long(b) for b in open(bot_fn,'r')) query = mysql_config.construct_bot_query(wp_pr) cur = mysql_config.get_analytics_cursor(wp_pr, opts, server_side=False) cur.execute(query) cur.connection.close() pr_bots = set(c[0] for c in cur) logger.debug("%s: There are %s additional bots (from %s) not in ErikZ bot file"%(wp_pr,len(pr_bots-erikZ_bots),len(pr_bots))) return erikZ_bots.union(pr_bots)
def retrieve_bot_list(wp_pr, opts): ''' Returns a set of all known bots for `wp_pr`. Bots are not labeled in a chohesive manner for Wikipedia. We use the union of the bots used for the [Wikipedia statistics](stats.wikimedia.org/), stored in `./data/erikZ.bots` and the `user_group.ug_group='bot'` flag in the MySql database. ''' bot_fn = os.path.join(os.path.split(__file__)[0], 'data', 'erikZ.bots') erikZ_bots = set(long(b) for b in open(bot_fn, 'r')) query = mysql_config.construct_bot_query(wp_pr) cur = mysql_config.get_analytics_cursor(wp_pr, opts, server_side=False) cur.execute(query) cur.close() pr_bots = set(c[0] for c in cur) logger.debug( "%s: There are %s additional bots (from %s) not in ErikZ bot file" % (wp_pr, len(pr_bots - erikZ_bots), len(pr_bots))) return erikZ_bots.union(pr_bots)