def print_average_edit_count_per_day(cls): data = { 'userid': [], 'bot': [], 'edit_count_per_day': [], 'registration': [] } for item in db.execute(cls.EDIT_COUNT_WITH_REGISTRATION_DATE_QUERY): data['userid'].append(item[0]) data['bot'].append(item[1]) registration_date = datetime.strptime(item[3], '%Y-%m-%dT%H:%M:%SZ') current_date = datetime.now() days_since_registration = (current_date - registration_date).days edit_count_per_day = int(item[2] / days_since_registration) data['edit_count_per_day'].append(edit_count_per_day) data['registration'].append(str(registration_date.date())) df = pd.DataFrame(data) df = df.sort_values(by=['edit_count_per_day', 'bot']) print_df(df, ['userid', 'bot', 'edit_count_per_day', 'registration'], ['12', '35', '20', '20'])
def print_bots_with_bot_flag_and_in_bot_group(cls): print_names_and_count( set([ item[0] for item in db.execute(cls.BOTS_WITH_BOT_FLAG_AND_IN_BOT_GROUP_QUERY) ]), 'unique bots with a bot flag and which belong to the group bot' )
def print_bots_without_request(cls): print_names_and_count( set([ item[0] for item in db.execute(cls.BOTS_WITHOUT_REQUEST_QUERY) ]), 'bots without a request for permission' )
def print_unique_bots(cls): print_names_and_count( set([ item[0] for item in db.execute(cls.UNIQUE_BOTS_QUERY) ]), 'unique bots' )
def print_bots_in_bot_group(cls): print_names_and_count( set([ item[0] for item in db.execute(cls.BOTS_WITHOUT_BOT_FLAG_IN_BOT_GROUP_QUERY) ]), 'unique bots which belong to the group bot and do not have a bot flag' )
def print_request_for_permission_without_closed_at(cls): print_names_and_count( [ item[0] for item in db.execute(cls.REQUEST_WITHOUT_CLOSED_AT_QUERY) ], 'requests for permissions without closed_at', "\n" )
def print_request_for_permission_without_editor_count(cls): print_names_and_count( [ item[0] for item in db.execute(cls.REQUEST_WITHOUT_EDITOR_COUNT_QUERY) ], 'requests for permissions without editor_count', "\n" )
def print_bots_without_request_without_groups(cls): print_names_and_count( set([ item[0] for item in db.execute(cls.GROUPS_OF_BOTS_WITHOUT_REQUEST_QUERY) if item[1] is None ]), 'bots without a request and without groups' )
def plot_distribution_over_time(cls, sql): earliest_time = None for item in db.execute(sql + cls.SQL_MIN): earliest_time = re.sub(cls.TIME_RE, '', item[0]) latest_time = None for item in db.execute(sql + cls.SQL_MAX): latest_time = re.sub(cls.TIME_RE, '', item[0]) time_series = cls.init_time_series(earliest_time, latest_time) result = db.execute(sql) for item in result: time = re.sub(cls.TIME_RE, '', item[0]) time_series[time] += 1 cls.plot(time_series, ['date', 'count'])
def plot_distribution(cls, sql): min_value = cls.get_min_value(sql + ''' ORDER BY editor_count''') max_value = cls.get_max_value(sql + ''' ORDER BY editor_count''') distribution = {} for i in range(min_value, max_value + 1): distribution[i] = 0 for item in db.execute(sql): distribution[item[0]] = item[1] cls.plot(distribution, ['editor_count', 'count'])
def print_groups_differences(cls): with_request_groups = [] for item in db.execute(cls.GROUPS_OF_BOTS_WITH_REQUEST_QUERY): if item[1] is not None: with_request_groups += item[1].split(',') with_request_groups = set(with_request_groups) without_request_groups = [] for item in db.execute(cls.GROUPS_OF_BOTS_WITHOUT_REQUEST_QUERY): if item[1] is not None: without_request_groups += item[1].split(',') without_request_groups = set(without_request_groups) print( "#################### All groups that bots with a request for permission belong to but all other bots do not: ####################\n", ', '.join(with_request_groups.difference(without_request_groups)), "\n") print( "#################### All groups that bots without a request for permission belong to but all other bots do not: ####################\n", ', '.join(without_request_groups.difference(with_request_groups)), "\n")
def print_bots_with_request_without_groups(cls): bots_with_red_link = [] bots_without_red_link = [] for item in db.execute(cls.GROUPS_OF_BOTS_WITH_REQUEST_QUERY): if item[1] is None: if item[2] == 1: bots_with_red_link.append(item[0]) else: bots_without_red_link.append(item[0]) print_names_and_count(bots_with_red_link, 'bots with a request, without groups and a red link') print_names_and_count(bots_without_red_link, 'bots with a request, without groups and without a red link')
def print_editor_count(cls, mode='none'): data = { 'url': [], 'editor_count': [] } for item in db.execute(cls.EDITOR_COUNT_QUERIES[mode]): data['url'].append(item[0]) data['editor_count'].append(item[1]) df = pd.DataFrame(data) df = df.sort_values(by=['editor_count', 'url']) print_df(df, ['url', 'editor_count'], ['90', '15'])
def print_right_differences_for_bot_flag_and_bot_group(cls): with_bot_flag_rights = [] for item in db.execute(cls.RIGHTS_OF_BOTS_WITH_BOT_FLAG_QUERY): if item[1] is not None: with_bot_flag_rights += item[1].split(',') with_bot_flag_rights = set(with_bot_flag_rights) in_bot_group_rights = [] for item in db.execute(cls.RIGHTS_OF_BOTS_IN_GROUP_BOT): if item[1] is not None: in_bot_group_rights += item[1].split(',') in_bot_group_rights = set(in_bot_group_rights) print( "#################### All rights that bots with a bot flag have but bots in bot group do not have: ####################\n", ', '.join(with_bot_flag_rights.difference(in_bot_group_rights)), "\n") print( "#################### All rights that bots which belong to the bot group have but bots with a bot flag do not have: ####################\n", ', '.join(in_bot_group_rights.difference(with_bot_flag_rights)), "\n")
def print_rights_of_bot_in_bot_group(cls): data = { 'right': [] } for item in db.execute(cls.RIGHTS_OF_BOTS_IN_GROUP_BOT): if item[1] is not None: data['right'] += item[1].split(',') df = pd.DataFrame(data) df = df.groupby(['right']).size().reset_index(name='count') df = df.sort_values(by=['count', 'right'], ascending=[False, True]) print_df(df, ['right', 'count'], ['30', '30'])
def print_right_differences_for_request(cls): with_request_rights = [] for item in db.execute(cls.RIGHTS_OF_BOTS_WITH_REQUEST_QUERY): if item[1] is not None: with_request_rights += item[1].split(',') with_request_rights = set(with_request_rights) without_request_rights = [] for item in db.execute(cls.RIGHTS_OF_BOTS_WITHOUT_REQUEST_QUERY): if item[1] is not None: without_request_rights += item[1].split(',') without_request_rights = set(without_request_rights) print( "#################### All rights that bots with a request for permission have but all other bots do not have: ####################\n", ', '.join(with_request_rights.difference(without_request_rights)), "\n") print( "#################### All rights that bots without a request for permission have but all other bots do not have: ####################\n", ', '.join(without_request_rights.difference(with_request_rights)), "\n")
def plot_general_statistics_about_requests(cls): data = { 'request': {}, 'bot': {} } for target in cls.GENERAL_STATISTICS_ABOUT_REQUESTS_QUERIES.keys(): for statistic, query in cls.GENERAL_STATISTICS_ABOUT_REQUESTS_QUERIES[target].items(): for item in db.execute(query): data[target][statistic] = item[0] trace0 = Bar( y=list(data['request'].keys()), x=list(data['request'].values()), name='Requests for Permissions', orientation='h', marker=dict( color='rgb(235,173,104)', line=dict( color='rgb(185,125,54)', width=1.5), ), opacity=0.8 ) trace1 = Bar( y=list(data['bot'].keys()), x=list(data['bot'].values()), name='Bots', orientation='h', marker=dict( color='rgb(204,204,204)', line=dict( color='rgb(150,150,150)', width=1.5), ), opacity=0.8 ) data = [trace0, trace1] layout = Layout( xaxis=dict(tickangle=-45), barmode='group', ) fig = Figure(data=data, layout=layout) iplot(fig, filename='angled-text-bar')
def plot_bots_groups_distribution(cls): data = { 'groups': [], 'count': [] } for item in db.execute(cls.GROUPED_GROUPS_OF_BOTS_QUERY): groups = item[0].split(',') groups.sort() data['groups'].append(', '.join(groups)) data['count'].append(item[1]) df = pd.DataFrame(data) df['%'] = round(df['count'] / df['count'].sum() * 100, 2) print_df(df, ['groups', 'count', '%'], ['60', '15', '15'])
def print_groups_of_bots_without_request(cls): data = { 'group': list(itertools.chain.from_iterable( [ item[1].split(',') for item in db.execute(cls.GROUPS_OF_BOTS_WITHOUT_REQUEST_QUERY) if item[1] is not None ] )) } df = pd.DataFrame(data) df = df.groupby(['group']).size().reset_index(name='count') df = df.sort_values(by=['count', 'group'], ascending=[False, True]) print_df(df, ['group', 'count'], ['30', '30'])
def print_edit_count(cls): data = { 'userid': [], 'bot': [], 'edit_count': [], 'registration': [] } for item in db.execute(cls.EDIT_COUNT_WITH_REGISTRATION_DATE_QUERY): data['userid'].append(item[0]) data['bot'].append(item[1]) data['edit_count'].append(item[2]) data['registration'].append(str(datetime.strptime(item[3], '%Y-%m-%dT%H:%M:%SZ').date())) df = pd.DataFrame(data) df = df.sort_values(by=['edit_count', 'bot']) print_df(df, ['userid', 'bot', 'edit_count', 'registration'], ['12', '35', '12', '20'])
def plot_bots_groups_without_implicit_groups_distribution(cls): data = { 'groups': [] } for item in db.execute(cls.GROUPED_GROUPS_OF_BOTS_WITHOUT_IMPLICIT_GROUPS_QUERY): all_groups = set(item[0].split(',')) implicit_groups = set(item[1].split(',')) explicit_groups = all_groups - implicit_groups explicit_groups = list(explicit_groups) explicit_groups.sort() data['groups'].append(', '.join(explicit_groups)) df = pd.DataFrame(data) df = df.groupby(['groups']).size().reset_index(name='count') df = df.sort_values(by=['count', 'groups'], ascending=[False, True]) df['%'] = round(df['count'] / df['count'].sum() * 100, 2) print_df(df, ['groups', 'count', '%'], ['40', '15', '15'])
def print_bots_with_request_without_rights(cls): bots_with_red_link = [] # bots_with_right_and_redlink = [] bots_without_red_link = [] for item in db.execute(cls.RIGHTS_OF_BOTS_WITH_REQUEST_QUERY): if item[1] is None: if item[2] == 1: bots_with_red_link.append(item[0]) else: bots_without_red_link.append(item[0]) # else: # if item[2] == 1: # bots_with_right_and_redlink.append(item[0]) #print( # "#################### Names of all bots with a request, with rights and a red link: ####################\n", # ', '.join(bots_with_right_and_redlink), "\n") print_names_and_count(bots_with_red_link, 'bots with a request, without rights and a red link') print_names_and_count(bots_without_red_link, 'bots with a request, without rights and without a red link')
def get_max_value(cls, sql): return [item[0] for item in db.execute(sql + cls.SQL_MAX)][0]
def retrieve_bots(sql): bots = [item[0] for item in db.execute(sql)] return bots, len(bots)