def __init__(self, config_file): """groups needing edits and size N edits to be included which k edits to be displayed """ config = yaml.safe_load( open( os.path.join( Path(__file__).parent.parent, 'config', config_file), 'r')) self.config = config self.langs = config['langs'] self.experiment_start_date = config['experiment_start_date'] self.observation_back_days = config['observation_back_days'] self.observation_start_date = self.experiment_start_date - datetime.timedelta( self.observation_back_days) self.mwapi_sessions = { lang: self.make_mwapi_session(lang) for lang in self.langs } self.wmf_con = make_wmf_con() self.wmf_db = {} self.wmf_db_hits = 0 self.thankers = {} self.surveys = {} self.merged = {} self.merged_no_survey = {} self.analysis = {} self.superthankers = {} self.db_engine = init_engine() self.db_session = init_session() self.qualtrics_map = yaml.safe_load( open( os.path.join( Path(__file__).parent.parent, 'config', "qualtrics_to_interal_field_map.yaml"), 'r'))
def __init__(self, thank_batch_size=1, lang=None): self.thank_batch_size = os.getenv('CS_WIKIPEDIA_OAUTH_BATCH_SIZE', thank_batch_size) logging.info(f"Thanking batch size set to : {self.thank_batch_size}") self.db_session = init_session() self.lang = lang logging.info(f"Thanking language set to. {self.lang}") self.consumer_token = mwoauth.ConsumerToken( os.environ['CS_OAUTH_CONSUMER_KEY'], os.environ['CS_OAUTH_CONSUMER_SECRET']) self.max_send_errors = int( os.environ['CS_OAUTH_THANKS_MAX_SEND_ERRORS'])
def add_num_quality_user(user_id, lang, namespace_fn_name, num_quality_revisions_replacement=None): db_session = init_session() wmf_con = make_wmf_con() namespace_fn = get_namespace_fn(namespace_fn_name) quality_revisions = num_quality_revisions if num_quality_revisions_replacement is None else num_quality_revisions_replacement num_quality = quality_revisions(user_id=user_id, lang=lang, wmf_con=wmf_con, namespace_fn=namespace_fn) user_rec = db_session.query(candidates).filter(candidates.lang == lang).filter( candidates.user_id == user_id).first() user_rec.user_editcount_quality = num_quality db_session.add(user_rec) db_session.commit()
def __init__(self, config_file, fn): """groups needing edits and size N edits to be included which k edits to be displayed """ config = yaml.safe_load( open( os.path.join( Path(__file__).parent.parent, 'config', config_file), 'r')) self.fn = fn self.config = config self.db_session = init_session() self.inital_num_experiment_things = self.num_experiment_things() self.df = None self.ets_to_add = []
def __init__(self, config=None): self.config = read_config_file(os.environ['CS_EXTRA_CONFIG_FILE'], __file__) self.db_engine = init_engine() self.db_session = init_session() if 'name' in self.config.keys(): self.experiment_id = _get_experiment_id(self.db_session, self.config['name'], return_id=True) self.csv_dir = os.path.join(self.config["dirs"]['project'], self.config["dirs"]['reports']) self.date = datetime.datetime.today().strftime('%Y%m%d') self.queries = {} self.to_addrs = self.config['reports']['to_addrs'] self.from_addr = self.config['reports']['from_addr'] self.subject_stat = None
def __init__(self, config_file, get_active_users_replacement=None, db_session_replacement=None): """groups needing edits and size N edits to be included which k edits to be displayed """ config = yaml.safe_load( open( os.path.join( Path(__file__).parent.parent, 'config', config_file), 'r')) self.config = config self.langs = config['langs'] self.min_edit_count = config['min_edit_count'] self.wmf_con = make_wmf_con() self.db_session = init_session( ) if not db_session_replacement else db_session_replacement self.experiment_start_date = config['experiment_start_date'] self.onboarding_earliest_active_date = self.experiment_start_date - timedelta( days=config['observation_back_days']) self.onboarding_latest_active_date = datetime.utcnow() self.populations = defaultdict(dict) self.namespace_fn = get_namespace_fn(config['namespace_fn']) self.get_active_users_replacement = get_active_users_replacement self.get_active_users = get_active_users if not get_active_users_replacement else get_active_users_replacement if 'max_onboarders_to_check' in self.config.keys(): self.max_onboarders_to_check = self.config[ 'max_onboarders_to_check'] else: self.max_onboarders_to_check = None self.users_in_thanker_experiment = { "ar": [], "de": [], "fa": [], "pl": [], "en": [] } self.q = Queue(name='onboarder_thankee', connection=Redis()) self.failed_q = Queue(name='failed', connection=Redis())
def __init__(self, lang=None, enable_create_actions=True, enable_execute_actions=True): self.config = read_config_file(os.environ['CS_EXTRA_CONFIG_FILE'], __file__) # self.config = read_config_file(os.environ['CS_EXTRA_CONFIG_FILE'], __file__) #consider changing to os.path.abspath('') self.batch_size = int(os.getenv('CS_WIKIPEDIA_ACTION_BATCH_SIZE', 2)) logging.info(f"Survey batch size set to : {self.batch_size}") self.db_session = init_session() self.lang = os.getenv('CS_WIKIPEDIA_LANG', lang) logging.info(f"Survey sending language set to. {self.lang}") self.consumer_token = mwoauth.ConsumerToken( os.environ['CS_OAUTH_CONSUMER_KEY'], os.environ['CS_OAUTH_CONSUMER_SECRET']) self.max_send_errors = int( os.getenv('CS_OAUTH_THANKS_MAX_SEND_ERRORS', 5)) self.intervention_type = self.config['settings']['intervention_type'] self.intervention_name = self.config['settings']['intervention_name'] self.api_con = None # a slot for a connection or session to keep open between different phases. self.dry_run = bool(int(os.getenv('CS_DRY_RUN', False))) self.enable_create_actions = enable_create_actions self.enable_execute_actions = enable_execute_actions
def sample_population(self, lang): """ - for incomplete groups: - sample active users - remove users with less than n edits - remove editors in thanker experiment - assign experience level (once only) - update/insert candidates - iterative representative sampling - add thanks history - add emailable status - add labour hours """ # Get the active users if "custom_users" in self.config["langs"][lang].keys(): # not sampling active users but cheating with custom_users list active_users = get_specific_users( lang, self.config['langs'][lang]["custom_users"], wmf_con=self.wmf_con) else: active_users = self.get_active_users( lang, start_date=self.onboarding_earliest_active_date, end_date=self.onboarding_latest_active_date, min_rev_id=self.langs[lang]['min_rev_id'], wmf_con=self.wmf_con) active_users_bots = self.add_bots(active_users, lang) logging.info( f"length of active users before bot check {len(active_users_bots)}" ) active_users_no_bots = active_users_bots[ active_users_bots['is_official_bot'] == False] bots = active_users_bots[active_users_bots['is_official_bot'] == True] logging.info( f"length of active users after bot check {len(active_users_no_bots)}" ) logging.info(f"active bots are {bots[['user_name','user_editcount']]}") # Subset to: - minimum edits active_users_min_edits = active_users_no_bots[ active_users['user_editcount'] >= self.min_edit_count] # need to have at least this many edits # Subset to non-thanker experiment active_users_min_edits_nonthanker = active_users_min_edits[ active_users_min_edits["user_id"].apply( lambda uid: uid not in self.users_in_thanker_experiment[lang])] # Add experience levels active_users_min_edits_nonthanker_exp = add_experience_bin( active_users_min_edits_nonthanker, self.experiment_start_date) logging.info( f"Group {lang} has {len(active_users_min_edits_nonthanker_exp)} active users with 4 edits in history." ) # Now work on groups groups = self.config['langs'][lang]['groups'] for group_name, inclusion_criteria in groups.items(): df = self.get_quality_data_for_group( super_group=active_users_min_edits_nonthanker_exp, lang=lang, group_name=group_name, inclusion_criteria=inclusion_criteria) if self.get_active_users_replacement: continue ## Nota Bene. This is where things ge a bit wonky. # 1. at first I thought that I would store the user state in a candidates table, and in fact # that is useful for the sake of being able to multiprocess the quality-edits revision # however it is a pain to update columns in the grow-only right pandas-style, which the rest of the # independent variables. in addition since we aren't onboarding in a rolling-state, but once every # active-window-days, we don't really need to store the state to compare it. at ths point in collecting # data we switch to the pandas style and keep the user state is a dict of data frames "population". # So todo: reconcile the two ways to store state. # add previous thanks received last 90 /84 # refereshing con here, sometimes gets stale after waiting self.wmf_con = make_wmf_con() self.db_session = init_session() logging.info('adding labour hours') if "labor_hours_84_days_pre_sample" not in df.columns: df = add_labour_hours( df, lang, start_date=self.onboarding_earliest_active_date, end_date=self.onboarding_latest_active_date, wmf_con=self.wmf_con, col_label="labor_hours_84_days_pre_sample") self.df_to_db_col(lang, df, 'labor_hours_84_days_pre_sample') logging.info(f'adding email df') if 'has_email' not in df.columns: df = add_has_email(df, lang, self.wmf_con) self.df_to_db_col(lang, df, 'has_email') logging.info(f'adding num prev_thanks_pre_sample') if "num_prev_thanks_pre_sample" not in df.columns: start_date = self.onboarding_earliest_active_date if group_name == 'newcomer' else THANK_FEATURE_INTRODUCITON df = add_thanks_receiving( df, lang, start_date=start_date, end_date=self.onboarding_latest_active_date, wmf_con=self.wmf_con, col_label='num_prev_thanks_pre_sample') self.df_to_db_col(lang, df, 'num_prev_thanks_pre_sample') logging.info( f"Group {lang}-{group_name} Saving {len(df)} as included.") df['user_included'] = True self.df_to_db_col(lang, df, 'user_included')
def db_session(): return init_session()
def db_session(): session = init_session() yield session session.close()