Exemple #1
0
def status(election_date):
    """
    The route /<election_date>/status will return the status of a given
    election date test, including the current position in the hopper, the
    playback speed, and the path of the file that will be served at the current
    position.
    """

    if request.args.get('national', None):
        LEVEL='local'
        if request.args['national'].lower() == 'true':
            LEVEL = 'national'
    else:
        return json.dumps({
            'error': True,
            'message': 'must specify national=true or national=false'
        })

    election_key = 'AP_DEJAVU_%s' % election_date

    hopper = sorted(glob.glob('%s%s/%s/*' % (DATA_DIR, election_date, LEVEL)), key=lambda x:x.split('recording-')[1])

    position = int(r_conn.get(election_key + '_POSITION') or 0)
    playback = int(r_conn.get(election_key + '_PLAYBACK') or 1)
    errormode = utils.to_bool(r_conn.get(election_key + '_ERRORMODE') or 'False')
    ratelimited = utils.to_bool(r_conn.get(election_key + '_RATELIMITED') or 'False')

    return json.dumps({
                'playback': playback, 
                'position': position,
                'errormode': errormode,
                'ratelimited': ratelimited,
                'file': hopper[position-1],
                'level': LEVEL
            })
Exemple #2
0
def execute(h2o, params, config):
    frame_id = config.get('frame_id')

    df = h2o.get_frame(frame_id)

    target_column = params.get("target_column")
    analyzer = params.get("analyzer")
    if len(analyzer) > 0:
        url = params.get("url")
        df_token = df[target_column].tokenize(
            f'tokenize:elasticsearch:{url}?analyzer={analyzer}_analyzer')
    else:
        df_token = df[target_column].tokenize(params.get('regex'))

    if to_bool(params.get('lower_case')):
        df_token = df_token.tolower()

    min_word_len = int(params.get('min_word_len'))
    if min_word_len > 0:
        df_token = df_token[(df_token.nchar() >= min_word_len) |
                            (df_token.isna()), :]

    if to_bool(params.get('use_stop_words')):
        df_token = df_token[(df_token.isna()) |
                            (~df_token.isin(STOP_WORDS)), :]

    dest_frame_id = append_frame_id(frame_id, params.get('suffix'))
    h2o.assign(df_token, dest_frame_id)

    return {'frame_id': dest_frame_id}
Exemple #3
0
def index():
    """
    Will match directories named like the following:
    2015-09-10
    09-10-2015
    """
    context = utils.build_context()
    context['elections'] = []
    elections = sorted([a.split('/')[-1] for a in glob.glob('%s/*' % DATA_DIR) if re.match('(\d{2,4}[-]\d{2,4}[-]\d{2,4})', a.split('/')[-1])], key=lambda x:x)

    for e in elections:
        for level in ['local', 'national']:
            national = False
            if level == 'national':
                national = True
            e_dict = {}
            election_key = 'AP_DEJAVU_%s' % e
            e_dict['election_date'] = e
            e_dict['national'] = national
            e_dict['level'] = level
            e_dict['title'] = "%s [%s]" % (e, level)
            e_dict['position'] = int(r_conn.get(election_key + '_POSITION') or 0)
            e_dict['total_positions'] = len(glob.glob('%s%s/%s/*' % (DATA_DIR, e, level)))
            e_dict['playback'] = int(r_conn.get(election_key + '_PLAYBACK') or 1)
            e_dict['errormode'] = utils.to_bool(r_conn.get(election_key + '_ERRORMODE') or 'False')
            e_dict['ratelimited'] = utils.to_bool(r_conn.get(election_key + '_RATELIMITED') or 'False')
            context['elections'].append(e_dict)
    return render_template('index.html', **context)
Exemple #4
0
    def on_participate(self, request):
        #获取分组列表
        alts = request.args.getlist('alternatives')
        # alt_fractions = request.args.getlist('alt_fractions')
        experiment_name = request.args.get('experiment')  #实验名称
        force = request.args.get('force')
        #获取record_force参数,并进行to_bool判断若为[y,ture,yes],返回Ture,否则False
        record_force = to_bool(request.args.get('record_force', 'false'))
        client_id = request.args.get('client_id')  #获取client_id
        traffic_fraction = request.args.get('traffic_fraction')  #获取参与比例

        if traffic_fraction is not None:
            traffic_fraction = float(
                traffic_fraction)  #若traffic_fraction存在则转为float()
        prefetch = to_bool(request.args.get(
            'prefetch',
            'false'))  #获取prefetch,并进行to_bool判断若为[y,ture,yes],返回Ture,否则Fals
        if client_id is None or experiment_name is None or alts is None:  #三个必要参数,否则返回400
            return json_error({'message': 'missing arguments'}, request, 400)

        dt = None
        if request.args.get(
                "datetime"):  #获取datetime参数,转化为datetime.datetime()类型,没有则默认None
            dt = dateutil.parser.parse(request.args.get("datetime"))
        try:
            if should_exclude_visitor(
                    request):  #判断user_agent是爬虫或者IP为指定排除的ip,则拒绝参与
                exp = Experiment.find(experiment_name, redis=self.redis)
                if exp.winner is not None:  #判断是否已经胜出
                    alt = exp.winner
                else:
                    alt = exp.control  #没有胜出,返回分组中的第一个赋值给alt
            else:
                alt = participate(
                    experiment_name,
                    alts,
                    client_id,
                    force=force,
                    record_force=record_force,
                    traffic_fraction=traffic_fraction,
                    # alt_fraction=alt_fractions,
                    prefetch=prefetch,
                    datetime=dt,
                    redis=self.redis)
        except ValueError as e:
            return json_error({'message': str(e)}, request, 400)

        resp = {
            'alternative': {
                'name': alt.name
            },
            'experiment': {
                'name': alt.experiment.name,
            },
            'client_id': client_id,
            'status': 'ok'
        }

        return json_success(resp, request)
Exemple #5
0
def execute(h2o, params, config):
    frame_id = config.get('frame_id')
    model_id = config.get('model_id')

    df = h2o.get_frame(frame_id)
    column_header = params.get('column_header')
    if len(column_header) > 0:
        df_head = df[:int(column_header)]
        df = df[int(column_header):]

    pred_model = h2o.get_model(model_id)

    df_pred = pred_model.predict(df)
    df_pred.columns = [x[len('reconstr_'):] for x in df_pred.columns]

    dest_frame_id = append_frame_id(frame_id, params.get('suffix'))

    if to_bool(params.get('topn_output')):
        df_topn = get_topN(df_pred, int(params.get('topn_percent')))
        if df_head is not None:
            df_topn = df_head.cbind(df_topn)
        h2o.assign(df_topn, dest_frame_id)
        h2o.remove(str(df_pred.frame_id))
    else:
        h2o.assign(df_pred, dest_frame_id)

    return {'frame_id': dest_frame_id}
Exemple #6
0
def execute(h2o, params, config):
    frame_id = config.get('frame_id')

    df = h2o.get_frame(frame_id)

    from h2o.estimators import H2OWord2vecEstimator
    w2v_model = H2OWord2vecEstimator(
        epochs=int(params.get('epochs')),
        init_learning_rate=float(params.get('init_learning_rate')),
        max_runtime_secs=float(params.get('max_runtime_secs')),
        min_word_freq=int(params.get('min_word_freq')),
        sent_sample_rate=float(params.get('sent_sample_rate')),
        vec_size=int(params.get('vec_size')),
        window_size=int(params.get('window_size')))

    w2v_model.train(training_frame=df)

    save_model(params, w2v_model.model_id)

    is_transform = params.get("is_transform")
    if is_transform is not None and to_bool(is_transform):
        df_vecs = w2v_model.transform(
            df, aggregate_method=params.get('aggregate_method'))
        dest_frame_id = append_frame_id(frame_id,
                                        params.get('transform_suffix'))
        h2o.assign(df_vecs, dest_frame_id)
    else:
        dest_frame_id = frame_id

    return {'frame_id': dest_frame_id, 'model_id': w2v_model.model_id}
    def on_participate(self, request):
        alts = request.args.getlist('alternatives')
        experiment_name = request.args.get('experiment')
        force = request.args.get('force')
        record_force = to_bool(request.args.get('record_force', 'false'))
        client_id = request.args.get('client_id')
        traffic_fraction = request.args.get('traffic_fraction')

        if traffic_fraction is not None:
            traffic_fraction = float(traffic_fraction)
        prefetch = to_bool(request.args.get('prefetch', 'false'))

        if client_id is None or experiment_name is None or alts is None:
            return json_error({'message': 'missing arguments'}, request, 400)

        dt = None
        if request.args.get("datetime"):
            dt = dateutil.parser.parse(request.args.get("datetime"))
        try:
            if should_exclude_visitor(request):
                exp = Experiment.find(experiment_name, redis=self.redis)
                if exp.winner is not None:
                    alt = exp.winner
                else:
                    alt = exp.control
            else:
                alt = participate(experiment_name, alts, client_id,
                                  force=force, record_force=record_force,
                                  traffic_fraction=traffic_fraction,
                                  prefetch=prefetch, datetime=dt, redis=self.redis)
        except ValueError as e:
            return json_error({'message': str(e)}, request, 400)

        resp = {
            'alternative': {
                'name': alt.name
            },
            'experiment': {
                'name': alt.experiment.name,
            },
            'client_id': client_id,
            'status': 'ok'
        }

        return json_success(resp, request)
Exemple #8
0
    def on_participate(self, request):
        alts = request.args.getlist('alternatives')
        experiment_name = request.args.get('experiment')
        force = request.args.get('force')
        record_force = to_bool(request.args.get('record_force', 'false'))
        client_id = request.args.get('client_id')
        traffic_fraction = request.args.get('traffic_fraction')

        if traffic_fraction is not None:
            traffic_fraction = float(traffic_fraction)
        prefetch = to_bool(request.args.get('prefetch', 'false'))

        if client_id is None or experiment_name is None or alts is None:
            return json_error({'message': 'missing arguments'}, request, 400)

        dt = None
        if request.args.get("datetime"):
            dt = dateutil.parser.parse(request.args.get("datetime"))
        try:
            if should_exclude_visitor(request):
                exp = Experiment.find(experiment_name, redis=self.redis)
                if exp.winner is not None:
                    alt = exp.winner
                else:
                    alt = exp.control
            else:
                alt = participate(experiment_name, alts, client_id,
                                  force=force, record_force=record_force,
                                  traffic_fraction=traffic_fraction,
                                  prefetch=prefetch, datetime=dt, redis=self.redis)
        except ValueError as e:
            return json_error({'message': str(e)}, request, 400)

        resp = {
            'alternative': {
                'name': alt.name
            },
            'experiment': {
                'name': alt.experiment.name,
            },
            'client_id': client_id,
            'status': 'ok'
        }

        return json_success(resp, request)
Exemple #9
0
def index():
    """
    Will match directories named like the following:
    2015-09-10
    09-10-2015
    """
    context = utils.build_context()
    context['elections'] = []
    elections = sorted([a.split('/')[-1] for a in glob.glob('%s/*' % DATA_DIR) if re.match('(\d{2,4}[-]\d{2,4}[-]\d{2,4})', a.split('/')[-1])], key=lambda x:x)
    for e in elections:
        e_dict = {}
        election_key = 'AP_DEJAVU_%s' % e
        e_dict['election_date'] = e
        e_dict['position'] = int(os.environ.get(election_key + '_POSITION', '0'))
        e_dict['total_positions'] = len(glob.glob('%s%s/*' % (DATA_DIR, e)))
        e_dict['playback'] = int(os.environ.get(election_key + '_PLAYBACK', '1'))
        e_dict['errormode'] = utils.to_bool(os.environ.get(election_key + '_ERRORMODE', 'False'))
        e_dict['ratelimited'] = utils.to_bool(os.environ.get(election_key + '_RATELIMITED', 'False'))
        context['elections'].append(e_dict)
    return render_template('index.html', **context)
Exemple #10
0
def status(year, election_date):
    """
    The route /<election_date>/status will return the status of a given
    election date test, including the current position in the hopper, the
    playback speed, and the path of the file that will be served at the current
    position.
    """

    if request.args.get('national', None):
        LEVEL = 'local'
        if request.args['national'].lower() == 'true':
            LEVEL = 'national'
    else:
        return json.dumps({
            'error':
            True,
            'message':
            'must specify national=true or national=false'
        })

    election_key = 'AP_DEJAVU_%s' % election_date

    hopper = sorted(glob.glob('%s%s/%s/*' % (DATA_DIR, election_date, LEVEL)),
                    key=lambda x: x)

    position = int(r_conn.get(election_key + '_POSITION') or 0)
    playback = int(r_conn.get(election_key + '_PLAYBACK') or 1)
    errormode = utils.to_bool(
        r_conn.get(election_key + '_ERRORMODE') or 'False')
    ratelimited = utils.to_bool(
        r_conn.get(election_key + '_RATELIMITED') or 'False')

    return json.dumps({
        'playback': playback,
        'position': position,
        'errormode': errormode,
        'ratelimited': ratelimited,
        'file': hopper[position - 1],
        'level': LEVEL
    })
Exemple #11
0
def execute(h2o, params, config):
    frame_id = config.get('frame_id')

    df = h2o.get_frame(frame_id)

    column = params.get('column')
    ascending = to_bool(params.get('ascending'))

    df_sort = df.sort(by=[column], ascending=[ascending])

    dest_frame_id = append_frame_id(frame_id, params.get('suffix'))
    h2o.assign(df_sort, dest_frame_id)

    return {'frame_id': dest_frame_id}
Exemple #12
0
 def add_template(self):
     """Add a new template in JSON format to `pandoc_templates.json`.
     """
     if DELIMITER in self.arg:
         key, value = self._parse_query(self.arg)
         if key == 'Name':
             self._store_template_info('name', value)
         elif key == 'Defaults':
             bool_v = utils.to_bool(value)
             self._store_template_info('use_defaults', bool_v)
         elif key == 'Command':
             clean_cmd = self._parse_template(value)
             self._store_template_info('options', clean_cmd)
     return 'Template successfully created!'
Exemple #13
0
 def add_template(self):
     """Add a new template in JSON format to `pandoc_templates.json`.
     """
     if DELIMITER in self.arg:
         key, value = self._parse_query(self.arg)
         if key == 'Name':
             self._store_template_info('name', value)
         elif key == 'Defaults':
             bool_v = utils.to_bool(value)
             self._store_template_info('use_defaults', bool_v)
         elif key == 'Command':
             clean_cmd = self._parse_template(value)
             self._store_template_info('options', clean_cmd)
     return 'Template successfully created!'
Exemple #14
0
def status(election_date):
    """
    The route /<election_date>/status will return the status of a given
    election date test, including the current position in the hopper, the
    playback speed, and the path of the file that will be served at the current
    position.
    """
    election_key = 'AP_DEJAVU_%s' % election_date

    hopper = sorted(glob.glob('%s%s/*' % (DATA_DIR, election_date)), key=lambda x:x.split('recording-')[1])

    position = int(os.environ.get(election_key + '_POSITION', '0'))
    playback = int(os.environ.get(election_key + '_PLAYBACK', '1'))
    errormode = utils.to_bool(os.environ.get(election_key + '_ERRORMODE', 'False'))
    ratelimited = utils.to_bool(os.environ.get(election_key + '_RATELIMITED', 'False'))

    return json.dumps({
                'playback': playback, 
                'position': position,
                'errormode': errormode,
                'ratelimited': ratelimited,
                'file': hopper[position-1]
            })
Exemple #15
0
def index(year):
    """
    Will match directories named like the following:
    2015-09-10
    09-10-2015
    """
    context = utils.build_context()
    context['elections'] = []
    context['year'] = year
    elections = sorted([
        a.split('/')[-1] for a in glob.glob('%s/*' % DATA_DIR)
        if re.match('(\d{2,4}[-]\d{2,4}[-]\d{2,4})',
                    a.split('/')[-1])
    ],
                       key=lambda x: x)

    for e in elections:
        for level in ['national']:
            national = True
            e_dict = {}
            election_key = 'AP_DEJAVU_%s' % e
            e_dict['election_date'] = e
            e_dict['national'] = national
            e_dict['level'] = level
            e_dict['title'] = "%s [%s]" % (e, level)
            e_dict['position'] = int(
                r_conn.get(election_key + '_POSITION') or 0)
            e_dict['total_positions'] = len(
                glob.glob('%s%s/%s/*' % (DATA_DIR, e, level)))
            e_dict['playback'] = int(
                r_conn.get(election_key + '_PLAYBACK') or 1)
            e_dict['errormode'] = utils.to_bool(
                r_conn.get(election_key + '_ERRORMODE') or 'False')
            e_dict['ratelimited'] = utils.to_bool(
                r_conn.get(election_key + '_RATELIMITED') or 'False')
            context['elections'].append(e_dict)
    return render_template('index.html', **context)
Exemple #16
0
def execute(h2o, params, config):
    frame_id = config.get('frame_id')

    df = h2o.get_frame(frame_id)
    column_header = params.get('column_header')
    if len(column_header) > 0:
        df = df[int(column_header):]

    from h2o.estimators.glrm import H2OGeneralizedLowRankEstimator
    glrm_model = H2OGeneralizedLowRankEstimator(
        expand_user_y=to_bool(params.get('expand_user_y')),
        gamma_x=float(params.get('gamma_x')),
        gamma_y=float(params.get('gamma_y')),
        ignore_const_cols=to_bool(params.get('ignore_const_cols')),
        impute_original=to_bool(params.get('impute_original')),
        init=str(params.get('init')),
        init_step_size=float(params.get('init_step_size')),
        k=int(params.get('k')),
        loss=str(params.get('loss')),
        max_iterations=int(params.get('max_iterations')),
        max_runtime_secs=float(params.get('max_runtime_secs')),
        max_updates=int(params.get('max_updates')),
        min_step_size=float(params.get('min_step_size')),
        multi_loss=str(params.get('multi_loss')),
        period=int(params.get('period')),
        recover_svd=to_bool(params.get('recover_svd')),
        regularization_x=str(params.get('regularization_x')),
        regularization_y=str(params.get('regularization_y')),
        score_each_iteration=to_bool(params.get('score_each_iteration')),
        seed=int(params.get('seed')),
        svd_method=str(params.get('svd_method')))
    glrm_model.train(training_frame=df)
    glrm_model.show()
    save_model(params, glrm_model.model_id)

    return {'frame_id': frame_id, 'model_id': glrm_model.model_id}
Exemple #17
0
def update_mlt_params(user_mlt_params, params):
    for k, v in user_mlt_params.items():
        if k not in USER_MLT_PARAMS:
            continue
        if not v.strip():
            v = None  # use ES defaults
        elif k == 'fields' or k == 'stop_words':
            v = map(string.strip, v.split(','))
        elif k == 'analyzer':
            pass
        elif k in ('boost_terms'):
            v = float(v)
        elif k == 'include':
            v = utils.to_bool(v)
        elif k == 'minimum_should_match':
            v = str(v)
        else:
            v = int(v)
        params[k] = v
Exemple #18
0
def execute(h2o, params, config):
    frame_id = config.get('frame_id')

    df = h2o.get_frame(frame_id)

    columns = params.get('columns')
    if columns is not None and len(columns) > 2:
        columns = json.loads(columns)
        df = df[columns]

    use_value = params.get('use')
    if use_value is not None and len(use_value) == 0:
        use_value = None
    df_cor = df.cor(na_rm=to_bool(params.get('na_rm')),
                    use=use_value,
                    method=params.get('method'))

    dest_frame_id = append_frame_id(frame_id, params.get('suffix'))
    h2o.assign(df_cor, dest_frame_id)

    return {'frame_id': dest_frame_id}
Exemple #19
0
def notify_failure(container: Container):
    if not to_bool(container.labels.get(FAILURE_NOTIFY)):
        log(f"<8133c8b6> ({container.name}) Skip send container failure message because "
            + f"of 'failure_notify'={container.labels.get(FAILURE_NOTIFY)}")
        return
    time = datetime.now(timezone.utc)
    failure_time = container.attrs["State"]["Health"]["Log"][-1]["End"]
    healthcheck_response = container.attrs["State"]["Health"]["Log"][-1][
        "Output"]
    if type(healthcheck_response) is str:
        healthcheck_response = healthcheck_response.strip()
    latest_send = None
    container_sends = list(
        sorted(filter(lambda x: x.container_name == container.name,
                      sended_emails),
               key=lambda x: x.send_time))
    if len(container_sends) > 0:
        latest_send = container_sends[-1]
    send_timeout = config.default_send_timeout_min
    if FAILURE_NOTIFY_TIMEOUT in container.labels.keys():
        send_timeout = int(container.labels[FAILURE_NOTIFY_TIMEOUT])
    if not latest_send or (time -
                           latest_send.send_time).seconds / 60 > send_timeout:
        default_address = config.default_receiver_address
        addresses = [default_address]
        if FAILURE_NOTIFY_EMAIL in container.labels.keys():
            addresses = container.labels[FAILURE_NOTIFY_EMAIL].split(',')
        log(f"<ad25da5b> ({container.name}) Send container failure message to: {addresses}"
            )
        for address in addresses:
            send_email(
                Email(address=address,
                      container_name=container.name,
                      failure_time=failure_time,
                      healthcheck_response=healthcheck_response))
    else:
        log(f"<8133c8b6>  ({container.name}) Skip send container failure message because "
            + f"of send timeout: {send_timeout} minutes")
Exemple #20
0
 def create(self, request):
     """
     POST
     Insertar los tribunales a partir de una comision
     :param request:
     :return :
     {status: True/False, data:{datos de la comision insertada o de todas las comisiones}
     """
     try:
         params = utils.get_params(request)
         self.logger.info('INICIO WS - COMISIONEVALUACIONVIEW POST del usuario: %s con parametros: %s' %
                          (request.user.email if hasattr(request.user, 'email') else request.user.username, params))
         if request.user.has_perm('comisiones_evaluacion.comision.create') or request.user.is_admin:
             comisiones = utils.to_bool(params.get('comisiones'))
             comision = Comision(request.user, params.get('convocatoria'), params.get('anio'),
                                 params.get('titulacion'), comisiones=comisiones)
             if not comisiones:
                 comision.tutores_comisiones()
             resul = comision.asig_tfgs()
             while comision.reintentar:
                 comision = Comision(request.user, params.get('convocatoria'), comisiones=comisiones)
                 comision.asig_tfgs()
             if resul['status']:
                 resul_status = status.HTTP_200_OK
             else:
                 resul = dict(message=resul['message'])
                 resul_status = status.HTTP_400_BAD_REQUEST
         else:
             resul = dict(message="Sin privilegios")
             resul_status = status.HTTP_405_METHOD_NOT_ALLOWED
         self.logger.info('FIN WS - COMISIONEVALUACIONVIEW POST del usuario: %s con resultado: %s' %
                          (request.user.email if hasattr(request.user, 'email') else request.user.username, resul))
         return Response(resul, status=resul_status)
     except Exception as e:
         resul = dict(status=False, message="Error en la llamada")
         self.logger.critical('COMISIONEVALUACIONVIEW POST: %s %s' % (resul, e))
         return Response(resul, status=status.HTTP_400_BAD_REQUEST)
Exemple #21
0
def execute(h2o, params, config):
    frame_id = config.get('frame_id')

    df = h2o.get_frame(frame_id)

    input_columns = params.get("input_columns")
    if input_columns is None or len(input_columns) == 0:
        input_columns = df.col_names
    else:
        import json
        input_columns = json.loads(input_columns)

    from h2o.estimators import H2OKMeansEstimator
    kmeans_model = H2OKMeansEstimator(
        categorical_encoding=params.get("categorical_encoding"),
        estimate_k=to_bool(params.get("estimate_k")),
        fold_assignment=params.get("fold_assignment"),
        ignore_const_cols=to_bool(params.get("ignore_const_cols")),
        init=params.get("init"),
        k=int(params.get("k")),
        keep_cross_validation_fold_assignment=to_bool(
            params.get("keep_cross_validation_fold_assignment")),
        keep_cross_validation_models=to_bool(
            params.get("keep_cross_validation_models")),
        keep_cross_validation_predictions=to_bool(
            params.get("keep_cross_validation_predictions")),
        max_iterations=int(params.get("max_iterations")),
        max_runtime_secs=float(params.get("max_runtime_secs")),
        nfolds=int(params.get("nfolds")),
        score_each_iteration=to_bool(params.get("score_each_iteration")),
        seed=int(params.get("seed")),
        standardize=to_bool(params.get("standardize")))
    kmeans_model.train(x=input_columns, training_frame=df)
    kmeans_model.show()

    save_model(params, kmeans_model.model_id)

    return {'frame_id': frame_id, 'model_id': kmeans_model.model_id}
Exemple #22
0
 def get_body_data(self, body):
     self.correlation_id = body.get('CorrelationId')
     self.success = to_bool(body.get('Success'))
Exemple #23
0
import os

from utils import to_bool

config_path = os.environ.get('SIXPACK_CONFIG', None)
if config_path:
    try:
        CONFIG = yaml.safe_load(open(config_path, 'r'))
    except IOError:
        raise RuntimeError(
            'SIXPACK_CONFIG - {0} - is an invalid path'.format(config_path))
    except yaml.YAMLError, exc:
        raise RuntimeError('Error in configuration file: {0}'.format(str(exc)))
else:
    CONFIG = {
        'enabled': to_bool(os.environ.get('SIXPACK_CONFIG_ENABLED', 'True')),
        'redis_port': int(os.environ.get('SIXPACK_CONFIG_REDIS_PORT', '6379')),
        'redis_host': os.environ.get('SIXPACK_CONFIG_REDIS_HOST', "localhost"),
        'redis_password': os.environ.get('SIXPACK_CONFIG_REDIS_PASSWORD', None),
        'redis_prefix': os.environ.get('SIXPACK_CONFIG_REDIS_PREFIX', "sxp"),
        'redis_socket_timeout': os.environ.get('SIXPACK_CONFIG_REDIS_SOCKET_TIMEOUT', None),
        'redis_sentinel_service_name': os.environ.get('SIXPACK_CONFIG_REDIS_SENTINEL_SERVICE_NAME', None),
        'redis_max_connections': int(os.environ.get('SIXPACK_CONFIG_REDIS_MAX_CONNECTIONS', '0')),
        'redis_db': int(os.environ.get('SIXPACK_CONFIG_REDIS_DB', '15')),
        'robot_regex': os.environ.get('SIXPACK_CONFIG_ROBOT_REGEX', "$^|trivial|facebook|MetaURI|butterfly|google|"
                                                                    "amazon|goldfire|sleuth|xenu|msnbot|SiteUptime|"
                                                                    "Slurp|WordPress|ZIBB|ZyBorg|pingdom|bot|yahoo|"
                                                                    "slurp|java|fetch|spider|url|crawl|oneriot|abby|"
                                                                    "commentreader|twiceler"),
        'ignored_ip_addresses': os.environ.get('SIXPACK_CONFIG_IGNORE_IPS', "").split(","),
        'asset_path': os.environ.get('SIXPACK_CONFIG_ASSET_PATH', "gen"),
Exemple #24
0
    def __init__(self, arguments, model, train_fn, valid_fn,
                 train_sets, valid_sets):
        self.logger = logging.getLogger(__name__)

        self.model = model
        self.train_fn = train_fn
        self.valid_fn = valid_fn
        self.train_sets = train_sets
        self.valid_sets = valid_sets

        #################### parse configs #################### 

        self.resume = False
        if "resume" in arguments:
            self.resume = utils.to_bool(arguments["resume"])

        self.wdir = arguments["wdir"]
        self.output_file = arguments["output_file"]

        self.learn_rate = 0.1
        if "learn_rate" in arguments:
            self.learn_rate = float(arguments["learn_rate"])

        self.halving_factor = 0.5    
        if "halving_factor" in arguments:
            self.halving_factor = float(arguments["halving_factor"])
        self.max_iters = 20
        if "max_iters" in arguments:
            self.max_iters = int(arguments["max_iters"])
        self.min_iters = 0
        if "min_iters" in arguments:
            self.min_iters = int(arguments["min_iters"])
        self.keep_lr_iters = 15
        if "keep_lr_iters" in arguments:
            self.keep_lr_iters = int(arguments["keep_lr_iters"])
        self.start_halving_impr=0.01
        if "start_halving_impr" in arguments:
            self.start_halving_impr = float(arguments["start_halving_impr"])
        self.end_halving_impr=0.001
        if "end_halving_impr" in arguments:
            self.end_halving_impr = float(arguments["end_halving_impr"])

        self.continue_with_rate = False
        if "continue_with_rate" in arguments:
            self.continue_with_rate = utils.to_bool(arguments["continue_with_rate"])

        self.halving_criteria = "loss"
        if "halving_criteria" in arguments:
            self.halving_criteria = arguments["halving_criteria"]
        criteria_list = ["loss", "frame_err"]
        if self.halving_criteria not in criteria_list:
            raise Exception("invalid halving criteria. must be one of " + str(criteria_list))

        # batch_size and momentum
        self.batch_size=256
        if arguments.has_key('batch_size'):
            self.batch_size = int(arguments['batch_size'])

        self.momentum=0.5
        self.momentum_start = 1
        if arguments.has_key('momentum'):
            self.momentum = float(arguments['momentum'])
        if 'momentum_start' in arguments:
            self.momentum_start = int(arguments['momentum_start'])

        # other stuff
        if self.resume:
            if not os.path.exists(self.wdir):
                raise Exception("wdir must exist if resume=True")
        else:
            if not os.path.exists(self.wdir):
                os.makedirs(self.wdir)
            else:
                self.logger.info("Directory already exists...")

        out = StringIO.StringIO()
        print >>out, "\n********** Trainer **********"
        print >>out, "resume", self.resume
        print >>out, "wdir", self.wdir
        print >>out, "output_file", self.output_file
        print >>out, "learn_rate", self.learn_rate
        print >>out, "halving_factor", self.halving_factor
        print >>out, "max_iters", self.max_iters
        print >>out, "min_iters", self.min_iters
        print >>out, "keep_lr_iters", self.keep_lr_iters
        print >>out, "start_halving_impr", self.start_halving_impr
        print >>out, "end_halving_impr", self.end_halving_impr
        print >>out, "continue_with_rate", self.continue_with_rate
        print >>out, "halving_criteria", self.halving_criteria
        print >>out, "batch_size", self.batch_size
        print >>out, "momentum", self.momentum
        print >>out, "momentum_start", self.momentum_start
        self.logger.info(out.getvalue())

        self.mlp_init = self.wdir + "/mlp_init"
        if not self.resume: # brand new
            save(self.model, self.mlp_init)

        # runtime state
        self.iter = 0
        self.done = False
        self.loss = sys.float_info.max
        self.rate = self.learn_rate
        self.mlp_best = self.mlp_init
        self.halving = False
        self.wasAccepted = True

        if self.resume:
            if os.path.isfile(self.wdir+"/trainer_state"):
                self._load_state()
Exemple #25
0
def replay(election_date):
    """
    The route `/<election_date>` will replay the election files found in the folder
    `/<DATA_DIR>/<election_date>/`. The files should be named such that the first file
    will be sorted first in a list by `glob.glob()`, e.g., a higher letter (a) or lower
    number (0). Incrementing UNIX timestamps (such as those captured by Elex) would be
    ideal.

    This route takes two optional control parameters. Once these have been passed to set
    up an election test, the raw URL will obey the instructions below until the last
    file in the hopper has been reached.

    * `position` will return the file at this position in the hopper. So, for example,
    `position=0` would set the pointer to the the first file in the hopper and return it.
    
    * `playback` will increment the position by itself until it is reset. So, for example, 
    `playback=5` would skip to every fifth file in the hopper.

    When the last file in the hopper has been reached, it will be returned until the Flask
    app is restarted OR a new pair of control parameters are passed.

    Example: Let's say you would like to test an election at 10x speed. You have 109
    files in your `/<DATA_DIR>/<election_date>/` folder named 001.json through 109.json

    * Request 1: `/<election_date>?position=0&playback=10` > 001.json
    * Request 2: `/<election_date>` > 011.json
    * Request 3: `/<election_date>` > 021.json
    * Request 4: `/<election_date>` > 031.json
    * Request 5: `/<election_date>` > 041.json
    * Request 6: `/<election_date>` > 051.json
    * Request 7: `/<election_date>` > 061.json
    * Request 8: `/<election_date>` > 071.json
    * Request 9: `/<election_date>` > 081.json
    * Request 10: `/<election_date>` > 091.json
    * Request 11: `/<election_date>` > 101.json
    * Request 12: `/<election_date>` > 109.json
    * Request 13 - ???: `/<election_date>` > 109.json

    Requesting /<election_date>?position=0&playback=1 will reset to the default position
    and playback speeds, respectively.
    """
    election_key = 'AP_DEJAVU_%s' % election_date

    hopper = sorted(glob.glob('%s%s/*' % (DATA_DIR, election_date)), key=lambda x:x.split('recording-')[1])

    position = int(os.environ.get(election_key + '_POSITION', '0'))
    playback = int(os.environ.get(election_key + '_PLAYBACK', '1'))

    errormode = utils.to_bool(os.environ.get(election_key + '_ERRORMODE', 'False'))
    ratelimited = utils.to_bool(os.environ.get(election_key + '_RATELIMITED', 'False'))

    if request.args.get('errormode', None):
        if request.args.get('errormode', None) == 'true':
            os.environ[election_key + '_ERRORMODE'] = 'True'
            errormode = True

        if request.args.get('errormode', None) == 'false':
            os.environ[election_key + '_ERRORMODE'] = 'False'
            errormode = False

    if request.args.get('ratelimited', None):
        if request.args.get('ratelimited', None) == 'true':
            os.environ[election_key + '_RATELIMITED'] = 'True'
            ratelimited = True

        if request.args.get('ratelimited', None) == 'false':
            os.environ[election_key + '_RATELIMITED'] = 'False'
            ratelimited = False

    if request.args.get('playback', None):
        try:
            playback = abs(int(request.args.get('playback', None)))
        except ValueError:
            return json.dumps({
                    'error': True,
                    'error_type': 'ValueError',
                    'message': 'playback must be an integer greater than 0.'
                })

    if request.args.get('position', None):
        try:
            position = abs(int(request.args.get('position', None)))
        except ValueError:
            return json.dumps({
                    'error': True,
                    'error_type': 'ValueError',
                    'message': 'position must be an integer greater than 0.'
                })

    os.environ[election_key + '_PLAYBACK'] = str(playback)

    if request.args.get('ratelimited', None) or request.args.get('errormode', None):
        return json.dumps({"success": True})
    else:
        if ratelimited:
            return make_response((RATELIMITED_STRING, 403, RATELIMITED_HEADERS))

        if errormode:
            if random.randrange(1,3) % 2 == 0:
                return make_response(json.dumps({"status": 500, "error": True}), 500, ERRORMODE_HEADERS)

    if position + playback < (len(hopper) - 1):
        """
        Needs the if statement here to set the position truly to zero if it's specified
        in the url params.
        """
        if request.args.get('position', None) or request.args.get('playback', None) or request.args.get('ratelimited', None) or request.args.get('errormode', None):
            os.environ[election_key + '_POSITION'] = str(position)
        else:
            os.environ[election_key + '_POSITION'] = str(position + playback)

    else:
        os.environ[election_key + '_POSITION'] = str(len(hopper))

    with open(hopper[position - 1], 'r') as readfile:
        payload = str(readfile.read())

    return payload
Exemple #26
0
from dotenv import load_dotenv

from utils import get_required_env, to_bool, log

dotenv_path = os.path.join(os.path.dirname(__file__), '.env')
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path)

default_send_timeout_min: int = int(os.getenv("DEFAULT_SEND_TIMEOUT_MIN", 15))
default_receiver_address: str = get_required_env("DEFAULT_RECEIVER_ADDRESS")
email_from: str = get_required_env("EMAIL_FROM")
email_host: str = get_required_env("EMAIL_HOST")
email_port: int = int(os.getenv("EMAIL_PORT", 465))
email_login: Optional[str] = os.getenv("EMAIL_LOGIN")
email_password: Optional[str] = os.getenv("EMAIL_PASSWORD")
email_enable_tls: bool = to_bool(os.getenv("EMAIL_ENABLE_TLS", False))
email_use_ssl: bool = to_bool(os.getenv("EMAIL_USE_SSL", False))
container_label: str = os.getenv("AUTOHEAL_CONTAINER_LABEL", "autoheal")
container_stop_timeout: int = int(
    os.getenv("AUTOHEAL_DEFAULT_STOP_TIMEOUT", 10))
container_interval: int = int(os.getenv("AUTOHEAL_INTERVAL", 5))
container_start_period: int = int(os.getenv("AUTOHEAL_START_PERIOD", 0))
container_debounce_time: int = int(os.getenv("AUTOHEAL_DEBOUNCE_TIME", 0))
clean_period: int = int(os.getenv("CLEAN_PERIOD", 24 * 60))
docker_base_url: str = os.getenv("DOCKER_BASE_URL",
                                 "unix://var/run/docker.sock")
docker_timeout: int = int(os.getenv("DOCKER_CONNECTION_TIMEOUT", 60))

log(f"""Configuration:
DEFAULT_SEND_TIMEOUT_MIN={default_send_timeout_min}
DEFAULT_RECEIVER_ADDRESS={default_receiver_address}
Exemple #27
0
import os

from utils import to_bool

config_path = os.environ.get('SIXPACK_CONFIG', None)
if config_path:
    try:
        CONFIG = yaml.safe_load(open(config_path, 'r'))
    except IOError:
        raise RuntimeError(
            'SIXPACK_CONFIG - {0} - is an invalid path'.format(config_path))
    except yaml.YAMLError, exc:
        raise RuntimeError('Error in configuration file: {0}'.format(str(exc)))
else:
    CONFIG = {
        'enabled': to_bool(os.environ.get('SIXPACK_CONFIG_ENABLED', 'True')),
        'redis_port': int(os.environ.get('SIXPACK_CONFIG_REDIS_PORT', '6379')),
        'redis_host': os.environ.get('SIXPACK_CONFIG_REDIS_HOST', "127.0.0.1"),
        'redis_password': os.environ.get('SIXPACK_CONFIG_REDIS_PASSWORD', None),
        'redis_prefix': os.environ.get('SIXPACK_CONFIG_REDIS_PREFIX', "sxp"),
        'redis_socket_timeout': os.environ.get('SIXPACK_CONFIG_REDIS_SOCKET_TIMEOUT', None),
        'redis_sentinel_service_name': os.environ.get('SIXPACK_CONFIG_REDIS_SENTINEL_SERVICE_NAME', None),
        'redis_max_connections': int(os.environ.get('SIXPACK_CONFIG_REDIS_MAX_CONNECTIONS', '0')),
        'redis_db': int(os.environ.get('SIXPACK_CONFIG_REDIS_DB', '15')),
        'robot_regex': os.environ.get('SIXPACK_CONFIG_ROBOT_REGEX', "$^|trivial|facebook|MetaURI|butterfly|google|"
                                                                    "amazon|goldfire|sleuth|xenu|msnbot|SiteUptime|"
                                                                    "Slurp|WordPress|ZIBB|ZyBorg|pingdom|bot|yahoo|"
                                                                    "slurp|java|fetch|spider|url|crawl|oneriot|abby|"
                                                                    "commentreader|twiceler"),
        'ignored_ip_addresses': os.environ.get('SIXPACK_CONFIG_IGNORE_IPS', "").split(","),
        'asset_path': os.environ.get('SIXPACK_CONFIG_ASSET_PATH', "gen"),
Exemple #28
0
    def __init__(self,
                 mod,
                 title,
                 version,
                 doc,
                 usage_data=None,
                 avail_modes=None,
                 supported_ui_toolkits=None,
                 run_as_root_ok=False,
                 quiet=False):

        self.mod = mod
        self.title = title
        self.version = version
        self.doc = doc
        self.usage_data = usage_data
        os.umask(0037)
        log.set_module(mod)
        self.args = []
        self.quiet = quiet
        self.lock_file = None
        prop.prog = sys.argv[0]

        if os.getenv("HPLIP_DEBUG"):
            log.set_level('debug')

        self.avail_modes = avail_modes
        if supported_ui_toolkits is not None:
            self.supported_ui_toolkits = supported_ui_toolkits
            self.num_supported_ui_toolkits = len(self.supported_ui_toolkits)
        else:
            self.supported_ui_toolkits = []
            self.num_supported_ui_toolkits = 0

        self.default_ui_toolkit = sys_conf.get('configure', 'ui-toolkit',
                                               'qt4')

        self.num_installed_ui_toolkits = 0
        self.installed_ui_toolkits = []
        if utils.to_bool(sys_conf.get('configure', 'qt3', '0')):
            self.installed_ui_toolkits.append(UI_TOOLKIT_QT3)
            self.num_installed_ui_toolkits += 1

        if utils.to_bool(sys_conf.get('configure', 'qt4', '0')):
            self.installed_ui_toolkits.append(UI_TOOLKIT_QT4)
            self.num_installed_ui_toolkits += 1

        self.default_mode = INTERACTIVE_MODE

        self.num_valid_modes = 0
        if self.avail_modes is not None:
            if GUI_MODE in self.avail_modes and prop.gui_build and self.installed_ui_toolkits:
                self.num_valid_modes += 1

            if INTERACTIVE_MODE in self.avail_modes:
                self.num_valid_modes += 1

            if NON_INTERACTIVE_MODE in self.avail_modes:
                self.num_valid_modes += 1

        if self.avail_modes is not None:
            if INTERACTIVE_MODE in self.avail_modes:
                self.default_mode = INTERACTIVE_MODE

            elif NON_INTERACTIVE_MODE in self.avail_modes:
                self.default_mode = NON_INTERACTIVE_MODE

        if self.supported_ui_toolkits is not None and prop.gui_build and self.installed_ui_toolkits:

            if self.default_ui_toolkit == 'qt3' and UI_TOOLKIT_QT4 in self.supported_ui_toolkits and \
                UI_TOOLKIT_QT3 not in self.supported_ui_toolkits and INTERACTIVE_MODE in self.avail_modes:

                # interactive + qt4 and default is qt3 --> set to interactive (if avail) (e.g., hp-align)
                self.default_mode = INTERACTIVE_MODE
                self.default_ui_toolkit = 'none'

            elif (UI_TOOLKIT_QT4 in self.supported_ui_toolkits and self.default_ui_toolkit == 'qt4' and UI_TOOLKIT_QT4 in self.installed_ui_toolkits) or \
                 (UI_TOOLKIT_QT3 in self.supported_ui_toolkits and self.default_ui_toolkit == 'qt3' and UI_TOOLKIT_QT3 in self.installed_ui_toolkits):

                self.default_mode = GUI_MODE

            elif self.default_ui_toolkit == 'qt3' and UI_TOOLKIT_QT3 not in self.supported_ui_toolkits:

                if UI_TOOLKIT_QT4 in self.supported_ui_toolkits and UI_TOOLKIT_QT4 in self.installed_ui_toolkits:  # (e.g, hp-linefeedcal?)
                    self.default_ui_toolkit = 'qt4'
                    self.default_mode = GUI_MODE

                elif INTERACTIVE_MODE in self.avail_modes:
                    self.default_mode = INTERACTIVE_MODE

                elif NON_INTERACTIVE_MODE in self.avail_modes:
                    self.default_mode = NON_INTERACTIVE_MODE

                else:
                    log.error("%s cannot be run using Qt3 toolkit." % self.mod)
                    sys.exit(1)

            elif self.default_ui_toolkit == 'qt4' and UI_TOOLKIT_QT4 not in self.supported_ui_toolkits:

                if UI_TOOLKIT_QT3 in self.supported_ui_toolkits and UI_TOOLKIT_QT3 in self.installed_ui_toolkits:  # (e.g., hp-unload)
                    self.default_ui_toolkit = 'qt3'
                    self.default_mode = GUI_MODE

                elif INTERACTIVE_MODE in self.avail_modes:
                    self.default_mode = INTERACTIVE_MODE

                elif NON_INTERACTIVE_MODE in self.avail_modes:
                    self.default_mode = NON_INTERACTIVE_MODE

                else:
                    log.error("%s cannot be run using Qt4 toolkit." % self.mod)
                    sys.exit(1)

        self.mode = self.default_mode

        #log.debug("Default ui-toolkit: %s" % self.default_ui_toolkit)
        #log.debug("Default mode: %s" % self.default_mode)

        if os.getuid() == 0 and not run_as_root_ok:
            log.warn("%s should not be run as root/superuser." % mod)
Exemple #29
0
def replay(year, election_date):
    """
    The route `/<election_date>` will replay the election files found in the folder
    `/<DATA_DIR>/<election_date>/`. The files should be named such that the first file
    will be sorted first in a list by `glob.glob()`, e.g., a higher letter (a) or lower
    number (0). Incrementing UNIX timestamps (such as those captured by Elex) would be
    ideal.

    This route takes two optional control parameters. Once these have been passed to set
    up an election test, the raw URL will obey the instructions below until the last
    file in the hopper has been reached.

    * `position` will return the file at this position in the hopper. So, for example,
    `position=0` would set the pointer to the the first file in the hopper and return it.

    * `playback` will increment the position by itself until it is reset. So, for example,
    `playback=5` would skip to every fifth file in the hopper.

    When the last file in the hopper has been reached, it will be returned until the Flask
    app is restarted OR a new pair of control parameters are passed.

    Example: Let's say you would like to test an election at 10x speed. You have 109
    files in your `/<DATA_DIR>/<election_date>/` folder named 001.json through 109.json

    * Request 1: `/<election_date>?position=0&playback=10` > 001.json
    * Request 2: `/<election_date>` > 011.json
    * Request 3: `/<election_date>` > 021.json
    * Request 4: `/<election_date>` > 031.json
    * Request 5: `/<election_date>` > 041.json
    * Request 6: `/<election_date>` > 051.json
    * Request 7: `/<election_date>` > 061.json
    * Request 8: `/<election_date>` > 071.json
    * Request 9: `/<election_date>` > 081.json
    * Request 10: `/<election_date>` > 091.json
    * Request 11: `/<election_date>` > 101.json
    * Request 12: `/<election_date>` > 109.json
    * Request 13 - ???: `/<election_date>` > 109.json

    Requesting /<election_date>?position=0&playback=1 will reset to the default position
    and playback speeds, respectively.
    """

    LEVEL = 'national'
    if request.args.get('national', None):
        if request.args['national'].lower() == 'false':
            LEVEL = 'local'

    # if request.args.get('national', None) and request.args.get('level', None):
    #     LEVEL = 'local'
    #     if request.args['national'].lower() == 'true':
    #         LEVEL = 'national'
    #     if request.args['level'] == 'district':
    #         LEVEL = 'districts'
    # else:
    #     return json.dumps({
    #         'error': True,
    #         'message': 'must specify national=true or national=false and level=ru or level=district'
    #     })

    election_key = 'AP_DEJAVU_%s' % election_date

    hopper = sorted(glob.glob('%s%s/%s/*' % (DATA_DIR, election_date, LEVEL)),
                    key=lambda x: x)

    position = int(r_conn.get(election_key + '_POSITION') or 0)
    playback = int(r_conn.get(election_key + '_PLAYBACK') or 1)

    errormode = utils.to_bool(
        r_conn.get(election_key + '_ERRORMODE') or 'False')
    ratelimited = utils.to_bool(
        r_conn.get(election_key + '_RATELIMITED') or 'False')

    if request.args.get('errormode', None):
        if request.args.get('errormode', None) == 'true':
            r_conn.set(election_key + '_ERRORMODE', 'True')
            errormode = True

        if request.args.get('errormode', None) == 'false':
            r_conn.set(election_key + '_ERRORMODE', 'False')
            errormode = False

    if request.args.get('ratelimited', None):
        if request.args.get('ratelimited', None) == 'true':
            r_conn.set(election_key + '_RATELIMITED', 'True')
            ratelimited = True

        if request.args.get('ratelimited', None) == 'false':
            r_conn.set(election_key + '_RATELIMITED', 'False')
            ratelimited = False

    if request.args.get('playback', None):
        try:
            playback = abs(int(request.args.get('playback', None)))
        except ValueError:
            return json.dumps({
                'error':
                True,
                'error_type':
                'ValueError',
                'message':
                'playback must be an integer greater than 0.'
            })

    if request.args.get('position', None):
        try:
            position = abs(int(request.args.get('position', None)))
        except ValueError:
            return json.dumps({
                'error':
                True,
                'error_type':
                'ValueError',
                'message':
                'position must be an integer greater than 0.'
            })

    r_conn.set(election_key + '_PLAYBACK', str(playback))

    if request.args.get('ratelimited', None) or request.args.get(
            'errormode', None):
        return json.dumps({"success": True})
    else:
        if ratelimited:
            return make_response(
                (RATELIMITED_STRING, 403, RATELIMITED_HEADERS))

        if errormode:
            if random.randrange(1, 3) % 2 == 0:
                return make_response(
                    json.dumps({
                        "status": 500,
                        "error": True
                    }), 500, ERRORMODE_HEADERS)

    if position + playback < (len(hopper) - 1):
        """
        Needs the if statement here to set the position truly to zero if it's specified
        in the url params.
        """
        if request.args.get('position', None) or request.args.get(
                'playback', None) or request.args.get(
                    'ratelimited', None) or request.args.get(
                        'errormode', None):
            r_conn.set(election_key + '_POSITION', str(position))
        else:
            r_conn.set(election_key + '_POSITION', str(position + playback))

    else:
        r_conn.set(election_key + '_POSITION', str(len(hopper)))

    with open(hopper[position - 1], 'r') as readfile:
        payload = str(readfile.read())

    return payload
Exemple #30
0
    def __init__(self,
                 inputsize,
                 taskcla,
                 use_processor=True,
                 processor_feats=(32, 128),
                 emb_size=100,
                 use_stem=1,
                 use_concat=False,
                 use_combination=True,
                 use_dropout=False,
                 gate="sigmoid"):
        super(Net, self).__init__()

        # safty checks
        if use_stem is not None and use_stem >= 5:
            raise ValueError(
                "The value of use_stem ({}) is larger than the number of layers (5)!"
                .format(use_stem))

        # set internal values
        ncha, size, _ = inputsize
        self.taskcla = taskcla  # contains tasks with number of classes
        self.use_stem = use_stem  # defines the number of stem layers to use (or None for none)
        self.use_processor = utils.to_bool(
            use_processor
        )  # defines if the pre-processor should be applied (or simply use task embeddings)
        self.is_linear_processor = False if use_stem is None else use_stem > 3
        self.use_combination = utils.to_bool(
            use_combination
        )  # defines if attention masks should be generated through combination (to save weights)
        self.use_concat = utils.to_bool(
            use_concat
        )  # defines if input to the pre-processor should be concated
        self.use_dropout = utils.to_bool(use_dropout)
        self.emb_size = len(
            taskcla
        ) if emb_size is None or use_processor is False else emb_size

        # create all relevant convolutions (either native as stem or dwa masked)
        self.mask_layers = torch.nn.ModuleList()
        self.mask_shapes = []
        self.c1, s, psize1 = self._create_conv(ncha, 64, size // 8, size, 1,
                                               use_stem, inputsize)
        self.c2, s, psize2 = self._create_conv(64, 128, size // 10, s, 2,
                                               use_stem, psize1)
        self.c3, s, psize3 = self._create_conv(128, 256, 2, s, 3, use_stem,
                                               psize2)
        self.smid = s
        self.maxpool = torch.nn.MaxPool2d(2)
        self.relu = torch.nn.ReLU()

        # check if dropout sould be added or skipped (through identity)
        if use_dropout is True:
            self.drop1 = torch.nn.Dropout(0.2)
            self.drop2 = torch.nn.Dropout(0.5)
        else:
            self.drop1 = torch.nn.Identity()
            self.drop2 = torch.nn.Identity()
        self.fc1, psize4 = self._create_linear(256 * self.smid * self.smid,
                                               2048, 4, use_stem, psize3)
        self.fc2, psize5 = self._create_linear(2048, 2048, 5, use_stem, psize4)

        # define the names of the masks
        self.mask_names = [
            "c1.weight", "c2.weight", "c3.weight", "fc1.weight", "fc2.weight"
        ]
        if use_stem is not None:
            self.mask_names = self.mask_names[use_stem:]

        # generate task processor
        # all context processor stuff should start with 'p'
        if use_processor is True:
            # params
            f_bn, f_out = processor_feats
            self.processor_size = psize5

            # adjust layers if input from FC
            if self.is_linear_processor:
                self.pfc1 = torch.nn.Linear(self.processor_size[0], f_bn)
                self.pfc2 = torch.nn.Linear(f_bn, f_out)
                self.pfc3 = torch.nn.Linear(f_out, self.emb_size)
                #self.pfc3 = torch.nn.Embedding(100 * len(taskcla), self.emb_size)
            else:
                # check for input size and minimize
                if self.processor_size[1] >= 14:
                    self.pc_min = torch.nn.MaxPool2d(2)
                    c, w, h = self.processor_size
                    self.processor_size = (c, w // 2, h // 2)
                else:
                    self.pc_min = torch.nn.Identity()

                # compute processor
                self.pc1 = torch.nn.Conv2d(self.processor_size[0], f_bn,
                                           (1, 1), (1, 1), 0)
                self.pc2 = torch.nn.Conv2d(f_bn, f_out, (3, 3), (2, 2), 1)
                cin = int(np.ceil(self.processor_size[1] / 2))
                self.pfc1 = torch.nn.Linear(cin * cin * f_out, self.emb_size)
                #self.pfc1 = torch.nn.Embedding(100 * len(taskcla), self.emb_size)
        else:
            self.task_eye = torch.eye(len(taskcla), requires_grad=False).cuda()

        # generate all possible heads (and put them in list - list is needed for torch to properly detect layers)
        self.last = torch.nn.ModuleList()
        for t, n in self.taskcla:
            self.last.append(torch.nn.Linear(2048, n))

        # gates for this approach
        if gate == "tanh":
            self.gate = torch.nn.Tanh()
        elif gate == "sigmoid":
            self.gate = torch.nn.Sigmoid()
        else:
            print("ERROR: given gate {} is unkown, using sigmoid")
            self.gate = torch.nn.Sigmoid()

        return
Exemple #31
0
    def __init__(self, mod, title, version, doc,
                 usage_data=None, avail_modes=None,
                 supported_ui_toolkits=None,
                 run_as_root_ok=False, quiet=False):

        self.mod = mod
        self.title = title
        self.version = version
        self.doc = doc
        self.usage_data = usage_data
        os.umask(0037)
        log.set_module(mod)
        self.args = []
        self.quiet = quiet
        self.lock_file = None
        prop.prog = sys.argv[0]

        if os.getenv("HPLIP_DEBUG"):
            log.set_level('debug')

        self.avail_modes = avail_modes
        if supported_ui_toolkits is not None:
            self.supported_ui_toolkits = supported_ui_toolkits
            self.num_supported_ui_toolkits = len(self.supported_ui_toolkits)
        else:
            self.supported_ui_toolkits = []
            self.num_supported_ui_toolkits = 0

        self.default_ui_toolkit = sys_conf.get('configure', 'ui-toolkit', 'qt4')

        self.num_installed_ui_toolkits = 0
        self.installed_ui_toolkits = []
        if utils.to_bool(sys_conf.get('configure', 'qt3', '0')):
            self.installed_ui_toolkits.append(UI_TOOLKIT_QT3)
            self.num_installed_ui_toolkits += 1

        if utils.to_bool(sys_conf.get('configure', 'qt4', '0')):
            self.installed_ui_toolkits.append(UI_TOOLKIT_QT4)
            self.num_installed_ui_toolkits += 1

        self.default_mode = INTERACTIVE_MODE

        self.num_valid_modes = 0
        if self.avail_modes is not None:
            if GUI_MODE in self.avail_modes and prop.gui_build and self.installed_ui_toolkits:
                self.num_valid_modes += 1

            if INTERACTIVE_MODE in self.avail_modes:
                self.num_valid_modes += 1

            if NON_INTERACTIVE_MODE in self.avail_modes:
                self.num_valid_modes += 1

        if self.avail_modes is not None:
            if INTERACTIVE_MODE in self.avail_modes:
                self.default_mode = INTERACTIVE_MODE

            elif NON_INTERACTIVE_MODE in self.avail_modes:
                self.default_mode = NON_INTERACTIVE_MODE

        if self.supported_ui_toolkits is not None and prop.gui_build and self.installed_ui_toolkits:

            if self.default_ui_toolkit == 'qt3' and UI_TOOLKIT_QT4 in self.supported_ui_toolkits and \
                UI_TOOLKIT_QT3 not in self.supported_ui_toolkits and INTERACTIVE_MODE in self.avail_modes:

                # interactive + qt4 and default is qt3 --> set to interactive (if avail) (e.g., hp-align)
                self.default_mode = INTERACTIVE_MODE
                self.default_ui_toolkit = 'none'

            elif (UI_TOOLKIT_QT4 in self.supported_ui_toolkits and self.default_ui_toolkit == 'qt4' and UI_TOOLKIT_QT4 in self.installed_ui_toolkits) or \
                 (UI_TOOLKIT_QT3 in self.supported_ui_toolkits and self.default_ui_toolkit == 'qt3' and UI_TOOLKIT_QT3 in self.installed_ui_toolkits):

                self.default_mode = GUI_MODE

            elif self.default_ui_toolkit == 'qt3' and UI_TOOLKIT_QT3 not in self.supported_ui_toolkits:

                if UI_TOOLKIT_QT4 in self.supported_ui_toolkits and UI_TOOLKIT_QT4 in self.installed_ui_toolkits: # (e.g, hp-linefeedcal?)
                    self.default_ui_toolkit = 'qt4'
                    self.default_mode = GUI_MODE

                elif INTERACTIVE_MODE in self.avail_modes:
                    self.default_mode = INTERACTIVE_MODE

                elif NON_INTERACTIVE_MODE in self.avail_modes:
                    self.default_mode = NON_INTERACTIVE_MODE

                else:
                    log.error("%s cannot be run using Qt3 toolkit." % self.mod)
                    sys.exit(1)

            elif self.default_ui_toolkit == 'qt4' and UI_TOOLKIT_QT4 not in self.supported_ui_toolkits:

                if UI_TOOLKIT_QT3 in self.supported_ui_toolkits and UI_TOOLKIT_QT3 in self.installed_ui_toolkits: # (e.g., hp-unload)
                    self.default_ui_toolkit = 'qt3'
                    self.default_mode = GUI_MODE

                elif INTERACTIVE_MODE in self.avail_modes:
                    self.default_mode = INTERACTIVE_MODE

                elif NON_INTERACTIVE_MODE in self.avail_modes:
                    self.default_mode = NON_INTERACTIVE_MODE

                else:
                    log.error("%s cannot be run using Qt4 toolkit." % self.mod)
                    sys.exit(1)


        self.mode = self.default_mode

        #log.debug("Default ui-toolkit: %s" % self.default_ui_toolkit)
        #log.debug("Default mode: %s" % self.default_mode)

        if os.getuid() == 0 and not run_as_root_ok:
            log.warn("%s should not be run as root/superuser." % mod)
Exemple #32
0
    def __init__(self, arguments, model, train_sets):
        self.logger = logging.getLogger(__name__)

        self.model = model
        self.train_sets = train_sets

        #################### parse configs ####################

        self.resume = False
        if "resume" in arguments:
            self.resume = utils.to_bool(arguments["resume"])

        self.wdir = arguments["wdir"]
        self.output_file = arguments["output_file"]

        self.max_iters = 20
        if "max_iters" in arguments:
            self.max_iters = int(arguments["max_iters"])

        #self.max_iters_without_impr = 3
        #if "max_iters_without_impr" in arguments:
        #    self.max_iters_without_impr = int(arguments["max_iters_without_impr"])

        self.first_layer_to_train = 0
        if "first_layer_to_train" in arguments:
            self.first_layer_to_train = int(arguments["first_layer_to_train"])

        self.last_layer_to_train = model.n_layers - 1  # number hidden layers - 1
        if "last_layer_to_train" in arguments:
            self.last_layer_to_train = int(arguments["last_layer_to_train"])

        # other stuff
        if self.resume:
            if not os.path.exists(self.wdir):
                raise Exception("wdir must exist if resume=True")
        else:
            if not os.path.exists(self.wdir):
                os.makedirs(self.wdir)
            else:
                self.logger.info("Directory already exists...")

        out = StringIO.StringIO()
        print >> out, "\n********** LayerwiseTrainer **********"
        print >> out, "resume", self.resume
        print >> out, "wdir", self.wdir
        print >> out, "output_file", self.output_file
        print >> out, "max_iters", self.max_iters
        print >> out, "first_layer_to_train", self.first_layer_to_train
        print >> out, "last_layer_to_train", self.last_layer_to_train
        self.logger.info(out.getvalue())

        self.mlp_init = self.wdir + "/mlp_init"
        if not self.resume:  # brand new
            save(self.model, self.mlp_init)

        # runtime state
        self.layer_index = self.first_layer_to_train
        self.iter = 0
        self.loss = sys.float_info.max
        self.mlp_best = self.mlp_init
        self.mlp_crrnt = self.mlp_init
        self.iters_without_impr = 0

        if self.resume:
            if os.path.isfile(self.wdir + "/layerwisetrainer_state"):
                self._load_state()
Exemple #33
0
    def __init__(self, dataset_args, n_ins):

        # stats
        self.mean = None
        self.std = None
        if 'train_stat' in dataset_args.keys():
            train_stat = dataset_args['train_stat']
            featureStats = stats.FeatureStats()
            featureStats.Load(train_stat)
            self.mean = featureStats.GetMean()
            self.std = featureStats.GetInvStd()

        # open lstfile
        file_path = dataset_args["lst_file"]
        if file_path.endswith('.gz'):
            file_read = gzip.open(file_path, 'r')
        else:
            file_read = open(file_path, 'r')

        separate_lines = False
        if "separate_lines" in dataset_args:
            separate_lines = to_bool(dataset_args["separate_lines"])

        self.has_labels = True
        if "has_labels" in dataset_args:
            self.has_labels = to_bool(dataset_args["has_labels"])

        # parse it, file_lst is a list of (featureFile, labelFile) pairs in the input set
        lines = [ln.strip() for ln in file_read]
        lines = [ln for ln in lines if ln != "" ]

        if self.has_labels:
            if separate_lines:
                if len(lines) % 2 != 0:
                    print("List has mis-matched number of feature files and label files")
                    sys.exit(1)
                self.orig_file_lst = []
                for i in xrange(0, len(lines), 2):
                    self.orig_file_lst.append((lines[i], lines[i+1]))
            else:
                self.orig_file_lst = []
                for i in xrange(len(lines)):
                    pair = re.compile("\s+").split(lines[i])
                    if len(pair) != 2:
                        print(lines[i])
                        print("Each line in the train and eval lists must contain feature file and label file separated by space character")
                        sys.exit(1)
                    self.orig_file_lst.append(pair)
        else:
            # no labels
            self.orig_file_lst = []
            for i in xrange(0, len(lines), 1):
                self.orig_file_lst.append((lines[i], None))

        # save arguments

        self.n_ins = n_ins
        self.file_format = dataset_args['file_format']

        self.file_format = "htk"
        if 'file_format' in dataset_args:
            self.file_format = dataset_args['file_format']

        self.offsetLabels = False
        if 'offset_labels' in dataset_args:
            self.offsetLabels = to_bool(dataset_args['offset_labels'])

        self.chunk_size = 32768
        if 'gpu_chunk' in dataset_args:
            self.chunk_size = int(dataset_args['gpu_chunk'])

        self.maxFeats = 0
        if "max_feats" in dataset_args:
            self.maxFeats = int(dataset_args["max_feats"])
        if self.maxFeats == 0:
            self.maxFeats = sys.maxint

        self.shuffle = True
        if 'shuffle' in dataset_args:
            self.shuffle = to_bool(dataset_args['shuffle'])

        self.seed = None
        if "seed" in dataset_args:
            self.seed = int(dataset_args["seed"])

        if int("_split_id" in dataset_args) + int("_num_splits" in dataset_args) == 1:
            raise Exception("_split_id must be used with _num_splits")
        self.num_splits = 0
        if "_num_splits" in dataset_args:
            self.num_splits = int(dataset_Args["_num_splits"])
            self.split_id = dataset_args["_split_id"]

        # internal state
        self.split_parts = False
        self.by_matrix = False
        self.x = numpy.zeros((self.chunk_size, self.n_ins), dtype=numpy.float32)
        if self.has_labels:
            self.y = numpy.zeros((self.chunk_size,), dtype=numpy.int32)
        else:
            self.y = None
        self.numpy_rng = numpy.random.RandomState(self.seed)

        #self.make_shared()
        self.initialize_read()
Exemple #34
0
    def __init__(self, dataset_args, n_ins):

        # stats
        self.mean = None
        self.std = None
        if 'train_stat' in dataset_args.keys():
            train_stat = dataset_args['train_stat']
            featureStats = stats.FeatureStats()
            featureStats.Load(train_stat)
            self.mean = featureStats.GetMean()
            self.std = featureStats.GetInvStd()

        # open lstfile
        file_path = dataset_args["lst_file"]
        if file_path.endswith('.gz'):
            file_read = gzip.open(file_path, 'r')
        else:
            file_read = open(file_path, 'r')

        separate_lines = False
        if "separate_lines" in dataset_args:
            separate_lines = to_bool(dataset_args["separate_lines"])

        self.has_labels = True
        if "has_labels" in dataset_args:
            self.has_labels = to_bool(dataset_args["has_labels"])

        # parse it, file_lst is a list of (featureFile, labelFile) pairs in the input set
        lines = [ln.strip() for ln in file_read]
        lines = [ln for ln in lines if ln != ""]

        if self.has_labels:
            if separate_lines:
                if len(lines) % 2 != 0:
                    print("List has mis-matched number of feature files and label files")
                    sys.exit(1)
                self.orig_file_lst = []
                for i in xrange(0, len(lines), 2):
                    self.orig_file_lst.append((lines[i], lines[i + 1]))
            else:
                self.orig_file_lst = []
                for i in xrange(len(lines)):
                    pair = re.compile("\s+").split(lines[i])
                    if len(pair) != 2:
                        print(lines[i])
                        print(
                            "Each line in the train and eval lists must contain feature file and label file separated by space character")
                        sys.exit(1)
                    self.orig_file_lst.append(pair)
        else:
            # no labels
            self.orig_file_lst = []
            for i in xrange(0, len(lines), 1):
                self.orig_file_lst.append((lines[i], None))

        # save arguments

        self.n_ins = n_ins
        self.file_format = dataset_args['file_format']

        self.file_format = "htk"
        if 'file_format' in dataset_args:
            self.file_format = dataset_args['file_format']

        self.offsetLabels = False
        if 'offset_labels' in dataset_args:
            self.offsetLabels = to_bool(dataset_args['offset_labels'])

        self.chunk_size = 32768
        if 'gpu_chunk' in dataset_args:
            self.chunk_size = int(dataset_args['gpu_chunk'])

        self.maxFeats = 0
        if "max_feats" in dataset_args:
            self.maxFeats = int(dataset_args["max_feats"])
        if self.maxFeats == 0:
            self.maxFeats = sys.maxint

        self.shuffle = True
        if 'shuffle' in dataset_args:
            self.shuffle = to_bool(dataset_args['shuffle'])

        self.seed = None
        if "seed" in dataset_args:
            self.seed = int(dataset_args["seed"])

        if int("_split_id" in dataset_args) + int("_num_splits" in dataset_args) == 1:
            raise Exception("_split_id must be used with _num_splits")
        self.num_splits = 0
        if "_num_splits" in dataset_args:
            self.num_splits = int(dataset_Args["_num_splits"])
            self.split_id = dataset_args["_split_id"]

        # internal state
        self.split_parts = False
        self.by_matrix = False
        self.x = numpy.zeros((self.chunk_size, self.n_ins), dtype=numpy.float32)
        if self.has_labels:
            self.y = numpy.zeros((self.chunk_size,), dtype=numpy.int32)
        else:
            self.y = None
        self.numpy_rng = numpy.random.RandomState(self.seed)

        # self.make_shared()
        self.initialize_read()
Exemple #35
0
import yaml
import os

from utils import to_bool

config_path = os.environ.get('SIXPACK_CONFIG', None)
if config_path:
    try:
        CONFIG = yaml.safe_load(open(config_path, 'r'))
    except IOError:
        raise RuntimeError('SIXPACK_CONFIG - {0} - is an invalid path'.format(config_path))
    except yaml.YAMLError, exc:
        raise RuntimeError('Error in configuration file: {0}'.format(str(exc)))
else:
    CONFIG = {
        'enabled': to_bool(os.environ.get('SIXPACK_CONFIG_ENABLED', 'True')),
        'redis_port': int(os.environ.get('SIXPACK_CONFIG_REDIS_PORT', '6379')),
        'redis_host': os.environ.get('SIXPACK_CONFIG_REDIS_HOST', "localhost"),
        'redis_password': os.environ.get('SIXPACK_CONFIG_REDIS_PASSWORD', None),
        'redis_prefix': os.environ.get('SIXPACK_CONFIG_REDIS_PREFIX', "sxp"),
        'redis_socket_timeout': os.environ.get('SIXPACK_CONFIG_REDIS_SOCKET_TIMEOUT', None),
        'redis_sentinel_service_name': os.environ.get('SIXPACK_CONFIG_REDIS_SENTINEL_SERVICE_NAME', None),
        'redis_max_connections': int(os.environ.get('SIXPACK_CONFIG_REDIS_MAX_CONNECTIONS', '0')),
        'redis_db': int(os.environ.get('SIXPACK_CONFIG_REDIS_DB', '15')),
        'robot_regex': os.environ.get('SIXPACK_CONFIG_ROBOT_REGEX', "$^|trivial|facebook|MetaURI|butterfly|google|"
                                                                    "amazon|goldfire|sleuth|xenu|msnbot|SiteUptime|"
                                                                    "Slurp|WordPress|ZIBB|ZyBorg|pingdom|bot|yahoo|"
                                                                    "slurp|java|fetch|spider|url|crawl|oneriot|abby|"
                                                                    "commentreader|twiceler"),
        'ignored_ip_addresses':os.environ.get('SIXPACK_CONFIG_IGNORE_IPS', "").split(","),
        'asset_path':os.environ.get('SIXPACK_CONFIG_ASSET_PATH', "gen"),
Exemple #36
0
        default=2500,
    )
    ap.add_argument("-st",
                    "--scroll_time",
                    help="How much time should I take to scroll?",
                    default=8)

    args = vars(ap.parse_args())
    print(args)

    # ---------------------------------------------------------
    # Global Variables
    # ---------------------------------------------------------

    # whether to download photos or not
    download_uploaded_photos = utils.to_bool(args["uploaded_photos"])
    download_friends_photos = utils.to_bool(args["friends_photos"])

    # whether to download the full image or its thumbnail (small size)
    # if small size is True then it will be very quick else if its false then it will open each photo to download it
    # and it will take much more time
    friends_small_size = utils.to_bool(args["friends_small_size"])
    photos_small_size = utils.to_bool(args["photos_small_size"])

    total_scrolls = int(args["total_scrolls"])
    scroll_time = int(args["scroll_time"])

    current_scrolls = 0
    old_height = 0

    driver = None
    def update(self, tfg, validated_data):
        try:
            # comprobando titulo
            if 'titulo' in validated_data.keys():
                if validated_data.get('titulo') == '' or not utils.is_string(validated_data.get('titulo')):
                    raise NameError("Titulo incorrecto")
                else:
                    tfg.titulo = validated_data.get('titulo')
            # comprobando tipo
            if 'tipo' in validated_data.keys():
                if validated_data.get('tipo') == '' or not utils.is_string(validated_data.get('tipo')):
                    raise NameError("Tipo incorrecto")
                else:
                    tfg.tipo = validated_data.get('tipo')

            # comprobando n_alumnos
            if 'n_alumnos' in validated_data.keys():
                if ( int(validated_data.get('n_alumnos')) <= 0) or ( int(validated_data.get('n_alumnos')) > 3):
                    raise NameError("Numero de alumnos incorrecto")
                else:
                    tfg.n_alumnos = validated_data.get('n_alumnos')

            # comprobando descripcion
            if 'descripcion' in validated_data.keys():
                if validated_data.get('descripcion') == '' or not utils.is_string(validated_data.get('descripcion')):
                    raise NameError("Descripcion incorrecta")
                else:
                    tfg.descripcion = validated_data.get('descripcion')

            # comprobando conocimientos_previos
            if 'conocimientos_previos' in validated_data.keys():
                if validated_data.get('conocimientos_previos') == '' or \
                        not utils.is_string(validated_data.get('conocimientos_previos')):
                    raise NameError("Conocimientos Previos incorrectos")
                else:
                    tfg.conocimientos_previos = validated_data.get('conocimientos_previos')

            # comprobando hard_soft
            if 'hard_soft' in validated_data.keys():
                if validated_data.get('hard_soft') == '' or not utils.is_string(validated_data.get('hard_soft')):
                    raise NameError("Hard/Soft incorrectos")
                else:
                    tfg.hard_soft = validated_data.get('hard_soft')

            # comprobando tutor
            if 'tutor' in validated_data.keys():
                try:
                    tutor = Profesor.objects.get(email=validated_data.get('tutor'))
                except:
                    raise NameError('El tutor no existe')
                # if not isinstance(tutor, Profesor) or tutor.groups.filter(name='Profesores').exists():
                #     raise NameError("Tutor incorrecto")
                # else:
                tfg.tutor = tutor

            # comprobando cotutor
            if 'cotutor' in validated_data.keys():
                if validated_data.get('cotutor') != '':
                    try:
                        cotutor = Profesor.objects.get(email=validated_data.get('cotutor'))
                    except:
                        raise NameError("El Cotutor no existe")
                    # if not isinstance(cotutor, Profesor) or not cotutor.groups.filter(name='Profesores').exists():
                    #     raise NameError("Cotutor incorrecto")
                    # else:
                    tfg.cotutor = cotutor
                else:
                    tfg.cotutor = None

            # comprobando titulacion
            if 'titulacion' in validated_data.keys():
                try:
                    titulacion = Titulacion.objects.get(codigo=validated_data.get('titulacion'))
                except:
                    raise NameError('La Titulacion no existe')
                if not isinstance(titulacion, Titulacion):
                    raise NameError("Titulacion incorrecta")
                else:
                    tfg.titulacion = titulacion

            # comprobando publicado
            if 'publicado' in validated_data.keys():
                if validated_data.get('publicado') == '' or not utils.is_bool(validated_data.get('publicado')):
                    raise NameError("Valor Publicado invalido")
                else:
                    tfg.publicado = utils.to_bool(validated_data.get('publicado'))

            # comprobando validado
            if 'validado' in validated_data.keys():
                if validated_data.get('validado') == '' or not utils.is_bool(validated_data.get('validado')):
                    raise NameError("Valor Validado invalido")
                else:
                    tfg.validado = utils.to_bool(validated_data.get('validado'))

            # comprobando asignado
            if 'asignado' in validated_data.keys():
                if validated_data.get('asignado') == '' or not utils.is_bool(validated_data.get('asignado')):
                    raise NameError("Valor Publicado invalido")
                else:
                    try:
                        tfg_asig = Tfg_Asig.objects.get(tfg=tfg)
                    except:
                        raise NameError('El Tfg asignado no existe')
                    if validated_data.get('asignado'):
                        tfg.asignado = utils.to_bool(validated_data.get('asignado'))
                    else:
                        try:
                            resul = self.serializer_class(tfg_asig).delete
                            tfg.asignado = utils.to_bool(validated_data.get('asignado'))
                        except:
                            raise NameError('Error al eliminar la asignacion del tfg')

            tfg.save()

            return dict(status=True, data=Tfg.objects.get(titulo=tfg.titulo))
        except NameError as e:
            return dict(status=False, message=e.message)
Exemple #38
0
    def __init__(self,
                 model,
                 nepochs=200,
                 sbatch=32,
                 lr=0.075,
                 lr_min=1e-4,
                 lr_factor=3,
                 lr_patience=5,
                 warmup=[10, 750],
                 clipgrad=10000,
                 curriculum="linear:100:0.2",
                 log_path=None,
                 sparsity=0.2,
                 bin_sparsity=False,
                 alpha=1.0,
                 lamb_loss=[10, 0.05],
                 lamb_reg=500,
                 delta=1,
                 stiff=None,
                 use_anchor_first=False,
                 scale_att_loss=False,
                 use_task_loss=False,
                 use_apex=False):
        super().__init__(model, nepochs, sbatch, lr, lr_min, lr_factor,
                         lr_patience, warmup, clipgrad, curriculum, log_path,
                         AMP_READY and use_apex)

        # set parameters
        print(
            "Setting Parameters to:\n\tsparsity: {}{}\n\talpha: {}\n\tdelta: {}\n\tlambda: {} / {}\n\tanchor (first task): {}\n\tscale att: {}"
            .format(sparsity, " (bin)" if bin_sparsity is True else "", alpha,
                    delta, lamb_loss, lamb_reg, use_anchor_first,
                    scale_att_loss))
        self.sparsity = sparsity
        self.alpha = alpha
        if isinstance(lamb_loss, list) or isinstance(lamb_loss, tuple):
            self.lamb_loss = lamb_loss
        else:
            self.lamb_loss = (lamb_loss, lamb_loss)
        self.lamb_reg = lamb_reg
        self.delta = delta
        self.bin_sparse = utils.to_bool(bin_sparsity)
        self.use_anchor_first = utils.to_bool(use_anchor_first)
        self.scale_attention = utils.to_bool(scale_att_loss)
        self.use_task_loss = utils.to_bool(use_task_loss)
        self.stiff = stiff

        # define constants used over training
        self.fisher = None
        self.anchor_neg = None
        self.anchor_pos = None
        self.anchor_task = None
        self.anchor_store = [None] * len(model.taskcla)

        # some anchor settings
        self.anchor_thres = 0.4  # complexity threshold for anchor data (not use to high complexity to avoid confusion)
        self.anchor_batches = 10  # number of batches to use for anchor training
        self.max_layers = 5

        # helper to improve time on sparsity
        self.sparsity_rates = {}

        # generate all task losses
        if self.use_task_loss is True:
            print("INFO: Generating task loss")
            num_tasks = len(model.taskcla)
            emb_size = model.emb_size
            elements = emb_size / num_tasks
            ite = np.zeros((num_tasks, emb_size), np.float32)
            for i in range(num_tasks):
                ite[i, int(i * elements):int((i + 1 * elements))] = 1
            self.ideal_task_embs = torch.from_numpy(ite).cuda()
            self.emb_mse_criterion = torch.nn.MSELoss(reduction='none')

        return
Exemple #39
0
def get_model(dataset):
    verbose = utils.to_bool(request.args.get('verbose', VERBOSE))
    explain = utils.to_bool(request.args.get('explain', EXPLAIN['enable']))
    method = request.args.get('method', DEFAULT_METHOD)
    return models.QueryIndex.build_query_index(
        dataset, verbose=verbose, explain=explain, method=method)
Exemple #40
0
import yaml
import os

from utils import to_bool

config_path = os.environ.get('SIXPACK_CONFIG', None)
if config_path:
    try:
        CONFIG = yaml.safe_load(open(config_path, 'r'))
    except IOError:
        raise RuntimeError('SIXPACK_CONFIG - {0} - is an invalid path'.format(config_path))
    except yaml.YAMLError, exc:
        raise RuntimeError('Error in configuration file: {0}'.format(str(exc)))
else:
    CONFIG = {
        'enabled': to_bool(os.environ.get('SIXPACK_CONFIG_ENABLED', 'True')),
        'redis_port': int(os.environ.get('SIXPACK_CONFIG_REDIS_PORT', '6379')),
        'redis_host': os.environ.get('SIXPACK_CONFIG_REDIS_HOST', "localhost"),
        'redis_password': os.environ.get('SIXPACK_CONFIG_REDIS_PASSWORD', None),
        'redis_prefix': os.environ.get('SIXPACK_CONFIG_REDIS_PREFIX', "sxp"),
        'redis_socket_timeout': os.environ.get('SIXPACK_CONFIG_REDIS_SOCKET_TIMEOUT', None),
        'redis_sentinel_service_name': os.environ.get('SIXPACK_CONFIG_REDIS_SENTINEL_SERVICE_NAME', None),
        'redis_db': int(os.environ.get('SIXPACK_CONFIG_REDIS_DB', '15')),
        'enable_whiplash': to_bool(os.environ.get('SIXPACK_CONFIG_WHIPLASH', 'False')),
        'robot_regex': os.environ.get('SIXPACK_CONFIG_ROBOT_REGEX', "$^|trivial|facebook|MetaURI|butterfly|google|"
                                                                    "amazon|goldfire|sleuth|xenu|msnbot|SiteUptime|"
                                                                    "Slurp|WordPress|ZIBB|ZyBorg|pingdom|bot|yahoo|"
                                                                    "slurp|java|fetch|spider|url|crawl|oneriot|abby|"
                                                                    "commentreader|twiceler"),
        'ignored_ip_addresses':os.environ.get('SIXPACK_CONFIG_IGNORE_IPS', "").split(","),
        'asset_path':os.environ.get('SIXPACK_CONFIG_ASSET_PATH', "gen"),
Exemple #41
0
import yaml
import os

from utils import to_bool

config_path = os.environ.get('SIXPACK_CONFIG', None)
if config_path:
    try:
        CONFIG = yaml.safe_load(open(config_path, 'r'))
    except IOError:
        raise RuntimeError('SIXPACK_CONFIG - {0} - is an invalid path'.format(config_path))
    except yaml.YAMLError, exc:
        raise RuntimeError('Error in configuration file: {0}'.format(str(exc)))
else:
    CONFIG = {
        'enabled': to_bool(os.environ.get('SIXPACK_CONFIG_ENABLED', 'True')),
        'redis_port': int(os.environ.get('SIXPACK_CONFIG_REDIS_PORT', '11024')),
        'redis_host': os.environ.get('SIXPACK_CONFIG_REDIS_HOST', "pub-redis-11024.us-east-1-4.3.ec2.garantiadata.com"),
        'redis_password': os.environ.get('SIXPACK_CONFIG_REDIS_PASSWORD', "Lsbtz4NIlAeKu5J5"),
        'redis_prefix': os.environ.get('SIXPACK_CONFIG_REDIS_PREFIX', "sxp"),
        'redis_socket_timeout': os.environ.get('SIXPACK_CONFIG_REDIS_SOCKET_TIMEOUT', None),
        'redis_sentinel_service_name': os.environ.get('SIXPACK_CONFIG_REDIS_SENTINEL_SERVICE_NAME', None),
        'redis_max_connections': int(os.environ.get('SIXPACK_CONFIG_REDIS_MAX_CONNECTIONS', '0')),
        'redis_db': int(os.environ.get('SIXPACK_CONFIG_REDIS_DB', '0')),
        'robot_regex': os.environ.get('SIXPACK_CONFIG_ROBOT_REGEX', "$^|trivial|facebook|MetaURI|butterfly|google|"
                                                                    "amazon|goldfire|sleuth|xenu|msnbot|SiteUptime|"
                                                                    "Slurp|WordPress|ZIBB|ZyBorg|pingdom|bot|yahoo|"
                                                                    "slurp|java|fetch|spider|url|crawl|oneriot|abby|"
                                                                    "commentreader|twiceler"),
        'ignored_ip_addresses':os.environ.get('SIXPACK_CONFIG_IGNORE_IPS', "").split(","),
        'asset_path':os.environ.get('SIXPACK_CONFIG_ASSET_PATH', "gen"),
Exemple #42
0
import os

from utils import to_bool

config_path = os.environ.get('SIXPACK_CONFIG', None)
if config_path:
    try:
        CONFIG = yaml.safe_load(open(config_path, 'r'))
    except IOError:
        raise RuntimeError(
            'SIXPACK_CONFIG - {0} - is an invalid path'.format(config_path))
    except yaml.YAMLError, exc:
        raise RuntimeError('Error in configuration file: {0}'.format(str(exc)))
else:
    CONFIG = {
        'enabled': to_bool(os.environ.get('SIXPACK_CONFIG_ENABLED', 'True')),
        'redis_port': int(os.environ.get('SIXPACK_CONFIG_REDIS_PORT', '6379')),
        'redis_host': os.environ.get('SIXPACK_CONFIG_REDIS_HOST', "localhost"),
        'redis_password': os.environ.get('SIXPACK_CONFIG_REDIS_PASSWORD', None),
        'redis_prefix': os.environ.get('SIXPACK_CONFIG_REDIS_PREFIX', "sxp"),
        'redis_socket_timeout': os.environ.get('SIXPACK_CONFIG_REDIS_SOCKET_TIMEOUT', None),
        'redis_sentinel_service_name': os.environ.get('SIXPACK_CONFIG_REDIS_SENTINEL_SERVICE_NAME', None),
        'redis_max_connections': int(os.environ.get('SIXPACK_CONFIG_REDIS_MAX_CONNECTIONS', '0')),
        'redis_db': int(os.environ.get('SIXPACK_CONFIG_REDIS_DB', '15')),
        'robot_regex': os.environ.get('SIXPACK_CONFIG_ROBOT_REGEX', "$^|trivial|facebook|MetaURI|butterfly|google|"
                                                                    "amazon|goldfire|sleuth|xenu|msnbot|SiteUptime|"
                                                                    "Slurp|WordPress|ZIBB|ZyBorg|pingdom|bot|yahoo|"
                                                                    "slurp|java|fetch|spider|url|crawl|oneriot|abby|"
                                                                    "commentreader|twiceler"),
        'ignored_ip_addresses': os.environ.get('SIXPACK_CONFIG_IGNORE_IPS', "").split(","),
        'asset_path': os.environ.get('SIXPACK_CONFIG_ASSET_PATH', "gen"),
Exemple #43
0
    def __init__(self, arguments, model, train_fn, valid_fn, train_sets,
                 valid_sets):
        self.logger = logging.getLogger(__name__)

        self.model = model
        self.train_fn = train_fn
        self.valid_fn = valid_fn
        self.train_sets = train_sets
        self.valid_sets = valid_sets

        #################### parse configs ####################

        self.resume = False
        if "resume" in arguments:
            self.resume = utils.to_bool(arguments["resume"])

        self.wdir = arguments["wdir"]
        self.output_file = arguments["output_file"]

        self.learn_rate = 0.1
        if "learn_rate" in arguments:
            self.learn_rate = float(arguments["learn_rate"])

        self.halving_factor = 0.5
        if "halving_factor" in arguments:
            self.halving_factor = float(arguments["halving_factor"])
        self.max_iters = 20
        if "max_iters" in arguments:
            self.max_iters = int(arguments["max_iters"])
        self.min_iters = 0
        if "min_iters" in arguments:
            self.min_iters = int(arguments["min_iters"])
        self.keep_lr_iters = 15
        if "keep_lr_iters" in arguments:
            self.keep_lr_iters = int(arguments["keep_lr_iters"])
        self.start_halving_impr = 0.01
        if "start_halving_impr" in arguments:
            self.start_halving_impr = float(arguments["start_halving_impr"])
        self.end_halving_impr = 0.001
        if "end_halving_impr" in arguments:
            self.end_halving_impr = float(arguments["end_halving_impr"])

        self.continue_with_rate = False
        if "continue_with_rate" in arguments:
            self.continue_with_rate = utils.to_bool(
                arguments["continue_with_rate"])

        self.halving_criteria = "loss"
        if "halving_criteria" in arguments:
            self.halving_criteria = arguments["halving_criteria"]
        criteria_list = ["loss", "frame_err"]
        if self.halving_criteria not in criteria_list:
            raise Exception("invalid halving criteria. must be one of " +
                            str(criteria_list))

        # batch_size and momentum
        self.batch_size = 256
        if arguments.has_key('batch_size'):
            self.batch_size = int(arguments['batch_size'])

        self.momentum = 0.5
        self.momentum_start = 1
        if arguments.has_key('momentum'):
            self.momentum = float(arguments['momentum'])
        if 'momentum_start' in arguments:
            self.momentum_start = int(arguments['momentum_start'])

        # other stuff
        if self.resume:
            if not os.path.exists(self.wdir):
                raise Exception("wdir must exist if resume=True")
        else:
            if not os.path.exists(self.wdir):
                os.makedirs(self.wdir)
            else:
                self.logger.info("Directory already exists...")

        out = StringIO.StringIO()
        print >> out, "\n********** Trainer **********"
        print >> out, "resume", self.resume
        print >> out, "wdir", self.wdir
        print >> out, "output_file", self.output_file
        print >> out, "learn_rate", self.learn_rate
        print >> out, "halving_factor", self.halving_factor
        print >> out, "max_iters", self.max_iters
        print >> out, "min_iters", self.min_iters
        print >> out, "keep_lr_iters", self.keep_lr_iters
        print >> out, "start_halving_impr", self.start_halving_impr
        print >> out, "end_halving_impr", self.end_halving_impr
        print >> out, "continue_with_rate", self.continue_with_rate
        print >> out, "halving_criteria", self.halving_criteria
        print >> out, "batch_size", self.batch_size
        print >> out, "momentum", self.momentum
        print >> out, "momentum_start", self.momentum_start
        self.logger.info(out.getvalue())

        self.mlp_init = self.wdir + "/mlp_init"
        if not self.resume:  # brand new
            save(self.model, self.mlp_init)

        # runtime state
        self.iter = 0
        self.done = False
        self.loss = sys.float_info.max
        self.rate = self.learn_rate
        self.mlp_best = self.mlp_init
        self.halving = False
        self.wasAccepted = True

        if self.resume:
            if os.path.isfile(self.wdir + "/trainer_state"):
                self._load_state()
Exemple #44
0
import yaml
import os

from utils import to_bool

config_path = os.environ.get('SIXPACK_CONFIG', None)
if config_path:
    try:
        CONFIG = yaml.safe_load(open(config_path, 'r'))
    except IOError:
        raise RuntimeError('SIXPACK_CONFIG - {0} - is an invalid path'.format(config_path))
    except yaml.YAMLError, exc:
        raise RuntimeError('Error in configuration file: {0}'.format(str(exc)))
else:
    CONFIG = {
        'enabled': to_bool(os.environ.get('SIXPACK_CONFIG_ENABLED', 'True')),
        'redis_port': int(os.environ.get('SIXPACK_CONFIG_REDIS_PORT', '6379')),
        'redis_host': os.environ.get('SIXPACK_CONFIG_REDIS_HOST', "localhost"),
        'redis_password': os.environ.get('SIXPACK_CONFIG_REDIS_PASSWORD', None),
        'redis_prefix': os.environ.get('SIXPACK_CONFIG_REDIS_PREFIX', "sxp"),
        'redis_socket_timeout': os.environ.get('SIXPACK_CONFIG_REDIS_SOCKET_TIMEOUT', None),
        'redis_sentinel_service_name': os.environ.get('SIXPACK_CONFIG_REDIS_SENTINEL_SERVICE_NAME', None),
        'redis_max_connections': int(os.environ.get('SIXPACK_CONFIG_REDIS_MAX_CONNECTIONS', '0')),
        'redis_db': int(os.environ.get('SIXPACK_CONFIG_REDIS_DB', '15')),
        'robot_regex': os.environ.get('SIXPACK_CONFIG_ROBOT_REGEX', "$^|trivial|facebook|MetaURI|butterfly|google|"
                                                                    "amazon|goldfire|sleuth|xenu|msnbot|SiteUptime|"
                                                                    "Slurp|WordPress|ZIBB|ZyBorg|pingdom|bot|yahoo|"
                                                                    "slurp|java|fetch|spider|url|crawl|oneriot|abby|"
                                                                    "commentreader|twiceler"),
        'ignored_ip_addresses':os.environ.get('SIXPACK_CONFIG_IGNORE_IPS', "").split(","),
        'asset_path':os.environ.get('SIXPACK_CONFIG_ASSET_PATH', "gen"),
Exemple #45
0
from utils import to_bool

config_path = os.environ.get('SIXPACK_CONFIG', None)
if config_path:
    try:
        CONFIG = yaml.safe_load(open(config_path, 'r'))
    except IOError:
        raise RuntimeError(
            'SIXPACK_CONFIG - {0} - is an invalid path'.format(config_path))
    except yaml.YAMLError, exc:
        raise RuntimeError('Error in configuration file: {0}'.format(str(exc)))
else:
    CONFIG = {
        'enabled':
        to_bool(os.environ.get('SIXPACK_CONFIG_ENABLED', 'True')),
        'redis_port':
        int(os.environ.get('SIXPACK_CONFIG_REDIS_PORT', '6379')),
        'redis_host':
        os.environ.get('SIXPACK_CONFIG_REDIS_HOST', "localhost"),
        'redis_password':
        os.environ.get('SIXPACK_CONFIG_REDIS_PASSWORD', None),
        'redis_prefix':
        os.environ.get('SIXPACK_CONFIG_REDIS_PREFIX', "sxp"),
        'redis_socket_timeout':
        os.environ.get('SIXPACK_CONFIG_REDIS_SOCKET_TIMEOUT', None),
        'redis_sentinel_service_name':
        os.environ.get('SIXPACK_CONFIG_REDIS_SENTINEL_SERVICE_NAME', None),
        'redis_max_connections':
        int(os.environ.get('SIXPACK_CONFIG_REDIS_MAX_CONNECTIONS', '0')),
        'redis_db':
Exemple #46
0
    def update(self, tfg, validated_data):
        try:
            # comprobando titulo
            if 'titulo' in validated_data.keys():
                if validated_data.get('titulo') == '' or not utils.is_string(
                        validated_data.get('titulo')):
                    raise NameError("Titulo incorrecto")
                else:
                    tfg.titulo = validated_data.get('titulo')
            # comprobando tipo
            if 'tipo' in validated_data.keys():
                if validated_data.get('tipo') == '' or not utils.is_string(
                        validated_data.get('tipo')):
                    raise NameError("Tipo incorrecto")
                else:
                    tfg.tipo = validated_data.get('tipo')

            # comprobando n_alumnos
            if 'n_alumnos' in validated_data.keys():
                if (int(validated_data.get('n_alumnos')) <= 0) or (int(
                        validated_data.get('n_alumnos')) > 3):
                    raise NameError("Numero de alumnos incorrecto")
                else:
                    tfg.n_alumnos = validated_data.get('n_alumnos')

            # comprobando descripcion
            if 'descripcion' in validated_data.keys():
                if validated_data.get(
                        'descripcion') == '' or not utils.is_string(
                            validated_data.get('descripcion')):
                    raise NameError("Descripcion incorrecta")
                else:
                    tfg.descripcion = validated_data.get('descripcion')

            # comprobando conocimientos_previos
            if 'conocimientos_previos' in validated_data.keys():
                if validated_data.get('conocimientos_previos') == '' or \
                        not utils.is_string(validated_data.get('conocimientos_previos')):
                    raise NameError("Conocimientos Previos incorrectos")
                else:
                    tfg.conocimientos_previos = validated_data.get(
                        'conocimientos_previos')

            # comprobando hard_soft
            if 'hard_soft' in validated_data.keys():
                if validated_data.get(
                        'hard_soft') == '' or not utils.is_string(
                            validated_data.get('hard_soft')):
                    raise NameError("Hard/Soft incorrectos")
                else:
                    tfg.hard_soft = validated_data.get('hard_soft')

            # comprobando tutor
            if 'tutor' in validated_data.keys():
                try:
                    tutor = Profesor.objects.get(
                        email=validated_data.get('tutor'))
                except:
                    raise NameError('El tutor no existe')
                # if not isinstance(tutor, Profesor) or tutor.groups.filter(name='Profesores').exists():
                #     raise NameError("Tutor incorrecto")
                # else:
                tfg.tutor = tutor

            # comprobando cotutor
            if 'cotutor' in validated_data.keys():
                if validated_data.get('cotutor') != '':
                    try:
                        cotutor = Profesor.objects.get(
                            email=validated_data.get('cotutor'))
                    except:
                        raise NameError("El Cotutor no existe")
                    # if not isinstance(cotutor, Profesor) or not cotutor.groups.filter(name='Profesores').exists():
                    #     raise NameError("Cotutor incorrecto")
                    # else:
                    tfg.cotutor = cotutor
                else:
                    tfg.cotutor = None

            # comprobando titulacion
            if 'titulacion' in validated_data.keys():
                try:
                    titulacion = Titulacion.objects.get(
                        codigo=validated_data.get('titulacion'))
                except:
                    raise NameError('La Titulacion no existe')
                if not isinstance(titulacion, Titulacion):
                    raise NameError("Titulacion incorrecta")
                else:
                    tfg.titulacion = titulacion

            # comprobando publicado
            if 'publicado' in validated_data.keys():
                if validated_data.get('publicado') == '' or not utils.is_bool(
                        validated_data.get('publicado')):
                    raise NameError("Valor Publicado invalido")
                else:
                    tfg.publicado = utils.to_bool(
                        validated_data.get('publicado'))

            # comprobando validado
            if 'validado' in validated_data.keys():
                if validated_data.get('validado') == '' or not utils.is_bool(
                        validated_data.get('validado')):
                    raise NameError("Valor Validado invalido")
                else:
                    tfg.validado = utils.to_bool(
                        validated_data.get('validado'))

            # comprobando asignado
            if 'asignado' in validated_data.keys():
                if validated_data.get('asignado') == '' or not utils.is_bool(
                        validated_data.get('asignado')):
                    raise NameError("Valor Publicado invalido")
                else:
                    try:
                        tfg_asig = Tfg_Asig.objects.get(tfg=tfg)
                    except:
                        raise NameError('El Tfg asignado no existe')
                    if validated_data.get('asignado'):
                        tfg.asignado = utils.to_bool(
                            validated_data.get('asignado'))
                    else:
                        try:
                            resul = self.serializer_class(tfg_asig).delete
                            tfg.asignado = utils.to_bool(
                                validated_data.get('asignado'))
                        except:
                            raise NameError(
                                'Error al eliminar la asignacion del tfg')

            tfg.save()

            return dict(status=True, data=Tfg.objects.get(titulo=tfg.titulo))
        except NameError as e:
            return dict(status=False, message=e.message)
Exemple #47
0
import os
from flask import Flask
from flask import jsonify

from utils import to_bool

POD_NAME = os.environ.get("POD_NAME")
POD_IP = os.environ.get("POD_IP")
NODE_NAME = os.environ.get("NODE_NAME")

app = Flask(__name__)
app.config['JSONIFY_PRETTYPRINT_REGULAR'] = True

@app.route('/')
def meta():
    return jsonify({
        'META': 'Here is some info about this pod',
        'POD_NAME': POD_NAME,
        'POD_IP': POD_IP,
        'NODE_NAME': NODE_NAME,
    })

if __name__ == '__main__':
    app.run(
        debug=to_bool(os.environ.get('DEBUG')),
        host='0.0.0.0'
    )
Exemple #48
0
    def __init__(self, arguments, model, train_sets):
        self.logger = logging.getLogger(__name__)

        self.model = model
        self.train_sets = train_sets

        #################### parse configs #################### 

        self.resume = False
        if "resume" in arguments:
            self.resume = utils.to_bool(arguments["resume"])

        self.wdir = arguments["wdir"]
        self.output_file = arguments["output_file"]

        self.max_iters = 20
        if "max_iters" in arguments:
            self.max_iters = int(arguments["max_iters"])

        #self.max_iters_without_impr = 3
        #if "max_iters_without_impr" in arguments:
        #    self.max_iters_without_impr = int(arguments["max_iters_without_impr"])

        self.first_layer_to_train = 0
        if "first_layer_to_train" in arguments:
            self.first_layer_to_train = int(arguments["first_layer_to_train"])

        self.last_layer_to_train = model.n_layers - 1	# number hidden layers - 1
        if "last_layer_to_train" in arguments:
            self.last_layer_to_train = int(arguments["last_layer_to_train"])

        # other stuff
        if self.resume:
            if not os.path.exists(self.wdir):
                raise Exception("wdir must exist if resume=True")
        else:
            if not os.path.exists(self.wdir):
                os.makedirs(self.wdir)
            else:
                self.logger.info("Directory already exists...")

        out = StringIO.StringIO()
        print >>out, "\n********** LayerwiseTrainer **********"
        print >>out, "resume", self.resume
        print >>out, "wdir", self.wdir
        print >>out, "output_file", self.output_file
        print >>out, "max_iters", self.max_iters
        print >>out, "first_layer_to_train", self.first_layer_to_train
        print >>out, "last_layer_to_train", self.last_layer_to_train
        self.logger.info(out.getvalue())

        self.mlp_init = self.wdir + "/mlp_init"
        if not self.resume: # brand new
            save(self.model, self.mlp_init)

        # runtime state
        self.layer_index = self.first_layer_to_train
        self.iter = 0
        self.loss = sys.float_info.max
        self.mlp_best = self.mlp_init
        self.mlp_crrnt = self.mlp_init
        self.iters_without_impr = 0

        if self.resume:
            if os.path.isfile(self.wdir+"/layerwisetrainer_state"):
                self._load_state()