def start(self, hidden=False): if FIRST_RUN: global LANG_MPAA langs = ["us", "de", "nl", "au"] LANG_MPAA = langs[util.dialogSelect(l("Choose_your_MPAA_system"), langs)] util.setting("mpaaLang", LANG_MPAA) util.settingBool("firstMpaaRun", False) if hidden: global HIDE_MPAA HIDE_MPAA = True movies = util.getMoviesWith('imdbnumber', 'mpaa') total = len(movies) if total > 0: self.startProcess(movies, total) else: util.dialogOk(l("Info"), l("The_video_library_is_empty_or_the_IMDb_id_doesn't_exist!")) return HIDE_MPAA
################ # IMDB Update # # by Jandalf # ################ import httplib, socket, json, util RATING_DIFF = 0.001 ENABLE_DIFF = util.settingBool("enableDiff") SEPARATOR = util.setting("separator").strip() class imdbMovie(object): def __init__(self, imdbID, httphandler): self.__rating = "" self.__votes = "" self.__error = False self.__imdbID = imdbID self.getData(httphandler) def getData(self, httphandler): try: httphandler.request("GET", "/?i=%s" % self.__imdbID) response = httphandler.getresponse() except (httplib.HTTPException, socket.timeout, socket.gaierror, socket.error): self.__error = True else: if response.status == 200: try: data = json.loads(response.read().decode('utf8'))
################ # MPAA Update # # by semool # ################ from util import l from imdbmpaa import imdbMpaa import util, httplib HIDE_MPAA = util.settingBool("hideMpaa") LANG_MPAA = util.setting("mpaaLang") FORM_MPAA = util.setting("mpaaPrefix") CHANGED_PREFIX = util.settingBool("enableMpaaPrefix") FIRST_RUN = util.settingBool("firstMpaaRun") class Mpaa: def start(self, hidden=False): if FIRST_RUN: global LANG_MPAA langs = ["us", "de", "nl", "au"] LANG_MPAA = langs[util.dialogSelect(l("Choose_your_MPAA_system"), langs)] util.setting("mpaaLang", LANG_MPAA) util.settingBool("firstMpaaRun", False) if hidden: global HIDE_MPAA HIDE_MPAA = True movies = util.getMoviesWith('imdbnumber', 'mpaa') total = len(movies)
def do_integerizing( trace_label, control_spec, control_totals, incidence_table, float_weights, total_hh_control_col): """ Parameters ---------- trace_label : str trace label indicating geography zone being integerized (e.g. PUMA_600) control_spec : pandas.Dataframe full control spec with columns 'target', 'seed_table', 'importance', ... control_totals : pandas.Series control totals explicitly specified for this zone incidence_table : pandas.Dataframe float_weights : pandas.Series balanced float weights to integerize total_hh_control_col : str name of total_hh column (preferentially constrain to match this control) Returns ------- integerized_weights : pandas.Series status : str as defined in integerizer.STATUS_TEXT and STATUS_SUCCESS """ # incidence table should only have control columns incidence_table = incidence_table[control_spec.target] if total_hh_control_col not in incidence_table.columns: raise RuntimeError("total_hh_control column '%s' not found in incidence table" % total_hh_control_col) zero_weight_rows = (float_weights == 0) if zero_weight_rows.any(): logger.debug("omitting %s zero weight rows out of %s" % (zero_weight_rows.sum(), len(incidence_table.index))) incidence_table = incidence_table[~zero_weight_rows] float_weights = float_weights[~zero_weight_rows] total_hh_control_value = control_totals[total_hh_control_col] status = None if setting('INTEGERIZE_WITH_BACKSTOPPED_CONTROLS') \ and len(control_totals) < len(incidence_table.columns): ########################################## # - backstopped control_totals # Use balanced float weights to establish target values for all control values # note: this more frequently results in infeasible solver results ########################################## relaxed_control_totals = \ np.round(np.dot(np.asanyarray(float_weights), incidence_table.as_matrix())) relaxed_control_totals = \ pd.Series(relaxed_control_totals, index=incidence_table.columns.values) # if the incidence table has only one record, then the final integer weights # should be just an array with 1 element equal to the total number of households; assert len(incidence_table.index) > 1 integerizer = Integerizer( incidence_table=incidence_table, control_importance_weights=control_spec.importance, float_weights=float_weights, relaxed_control_totals=relaxed_control_totals, total_hh_control_value=total_hh_control_value, total_hh_control_index=incidence_table.columns.get_loc(total_hh_control_col), control_is_hh_based=control_spec['seed_table'] == 'households', trace_label='backstopped_%s' % trace_label ) # otherwise, solve for the integer weights using the Mixed Integer Programming solver. status = integerizer.integerize() logger.debug("Integerizer status for backstopped %s: %s" % (trace_label, status)) # if we either tried backstopped controls or failed, or never tried at all if status not in STATUS_SUCCESS: ########################################## # - unbackstopped partial control_totals # Use balanced weights to establish control totals only for explicitly specified controls # note: this usually results in feasible solver results, except for some single hh zones ########################################## balanced_control_cols = control_totals.index incidence_table = incidence_table[balanced_control_cols] control_spec = control_spec[control_spec.target.isin(balanced_control_cols)] relaxed_control_totals = \ np.round(np.dot(np.asanyarray(float_weights), incidence_table.as_matrix())) relaxed_control_totals = \ pd.Series(relaxed_control_totals, index=incidence_table.columns.values) integerizer = Integerizer( incidence_table=incidence_table, control_importance_weights=control_spec.importance, float_weights=float_weights, relaxed_control_totals=relaxed_control_totals, total_hh_control_value=total_hh_control_value, total_hh_control_index=incidence_table.columns.get_loc(total_hh_control_col), control_is_hh_based=control_spec['seed_table'] == 'households', trace_label=trace_label ) status = integerizer.integerize() logger.debug("Integerizer status for unbackstopped %s: %s" % (trace_label, status)) if status not in STATUS_SUCCESS: logger.error("Integerizer failed for %s status %s. " "Returning smart-rounded original weights" % (trace_label, status)) elif status != 'OPTIMAL': logger.warn("Integerizer status non-optimal for %s status %s." % (trace_label, status)) integerized_weights = pd.Series(0, index=zero_weight_rows.index) integerized_weights.update(integerizer.weights['integerized_weight']) return integerized_weights, status
seed = 0 device = "cuda:0" epochs = 1 n_labels = len(AUX_COLUMNS) + 1 max_len = 300 batch_size = 16 base_lr = 2e-5 gammas = [0.75, 0.5, 0.25] accumulation_steps = 8 # train_size = 1200000 valid_size = 100000 exp = "exp5" seed_torch(seed) setup_logger(out_file=LOGGER_PATH) mkdir(WORK_DIR) setting(BERT_MODEL_PATH, WORK_DIR) @contextmanager def timer(name): t0 = time.time() yield LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s') def convert_to_bool(df, col_name): df[col_name] = np.where(df[col_name] >= 0.5, True, False) def convert_dataframe_to_bool(df): bool_df = df.copy()
def use_cvxpy(): return setting('USE_CVXPY', False)
def use_simul_integerizer(): # use_simul_integerizer it if we can it unless told not to return setting('USE_SIMUL_INTEGERIZER', True)
################ # MPAA Update # # by semool # ################ from util import l from imdbmpaa import imdbMpaa import util, httplib HIDE_MPAA = util.settingBool("hideMpaa") LANG_MPAA = util.setting("mpaaLang") FORM_MPAA = util.setting("mpaaPrefix") CHANGED_PREFIX = util.settingBool("enableMpaaPrefix") FIRST_RUN = util.settingBool("firstMpaaRun") class Mpaa: def start(self, hidden=False): if FIRST_RUN: global LANG_MPAA langs = ["us", "de", "nl", "au"] LANG_MPAA = langs[util.dialogSelect(l("Choose_your_MPAA_system"), langs)] util.setting("mpaaLang", LANG_MPAA) util.settingBool("firstMpaaRun", False) if hidden: global HIDE_MPAA HIDE_MPAA = True movies = util.getMoviesWith('imdbnumber', 'mpaa')