예제 #1
0
def generate_file(context, output_path, template):
    if "$file_name$" in context:
        file_name = context["$file_name$"]
    else:
        file_name = get_timestamp()
    output_path_with_filename = output_path / file_name
    template.build_text_file(context, output_path_with_filename)
    return output_path_with_filename
예제 #2
0
 def _get_default_bindings(self):
     return {
         'cve_id': self._cve_dict.get('cve_id'),
         'description': self._cve_dict.get('description'),
         'cvss_v2': self._cve_dict.get('cvss_v2') or 10.0,  # assume the worst
         'ecosystem': self._cve_dict.get('ecosystem'),
         'modified_date': get_timestamp()
     }
예제 #3
0
def cross_validate(train_files, test_files, model_name, exp, kfolds):
    # Complete args
    model_name = model_name.lower() if model_name else input(
        'model_name=?').lower()
    exp_num = 'exp_'
    n = str(exp) if exp else input('exp_?')
    exp_num += n

    logger.info('---------- Cross validation {} on {} start ----------'.format(
        model_name, exp_num))

    # Load data
    logger.info('Loading training set: {}'.format(list(train_files)))
    train_df = load_corpus(list(train_files))
    if test_files != ():
        logger.info('Loading test set: {}'.format(list(test_files)))
        test_df = load_corpus(list(test_files))
    else:
        test_df = None

    # Load configs
    cfg_path = get_envar('CONFIG_PATH') + '/' + get_envar('BASE_CONFIG')
    logger.info('Loading base_configs from {}'.format(cfg_path))
    base_configs = read_config(cfg_path, obj_view=True)

    logger.info('Loading exp_configs on {} from {}'.format(
        exp_num, base_configs.exp_configs.path))
    exp_configs = read_config(base_configs.exp_configs.path,
                              obj_view=False)[exp_num]

    description = exp_configs['description']
    hyparams = exp_configs['hyperparams']
    logger.info('Experiment description: {}'.format(description.strip()))
    logger.info('Hyperparams: {}'.format(hyparams))

    wdir = base_configs.model.savepath + get_timestamp() + '/'

    # CV
    kf = KFold(n_splits=kfolds, shuffle=True, random_state=42)

    cv = {}

    for k, (train_idx, val_idx) in enumerate(kf.split(train_df)):
        logger.info(f'-- Cross validation split {k+1} --')
        rec = train_validate(model_name, hyparams, train_df.iloc[train_idx],
                             train_df.iloc[val_idx], test_df)
        cv.update({f'CV_{k+1}': rec})

    (_, cv_val, cv_test), df = best_scores(cv, complete=False)
    logger.info(
        f'**CV RESULTS** val_acc3={cv_val:.2%} test_acc3={cv_test:.2%}')
    df.to_clipboard()
    logger.info(f'CV details copied to clipboard \n{df}')
    logger.info('---------- Cross validation {} on {} end ----------'.format(
        model_name, exp_num))
예제 #4
0
    def prepare_payload(self):
        """Prepare payload for Gremlin."""
        timestamp = get_timestamp()
        payload = {
            'gremlin': cve_node_delete_script_template,
            'bindings': {
                'cve_id': self._cve_id_dict.get('cve_id'),
                'timestamp': timestamp
            }
        }

        return payload
예제 #5
0
    def predict(self, path, plot=False):
        y = self._load(path, mfcc=False)
        activity = zcr_vad(y)
        spans = get_timestamp(activity)

        embed = [self._encode_segment(y, span) for span in spans]
        embed = torch.cat(embed).cpu().numpy()
        speakers = OptimizedAgglomerativeClustering().fit_predict(embed)

        if plot:
            self._plot_diarization(y, spans, speakers)

        timestamp = np.array(spans) / self.sr
        return timestamp, speakers
예제 #6
0
 def _get_bindings(self, vulnerability):
     return {
         'snyk_vuln_id': vulnerability.get('id'),
         'description': vulnerability.get('description'),
         'cvss_score': vulnerability.get('cvssScore') or 10.0,  # assume the worst
         'ecosystem': vulnerability.get('ecosystem'),
         'modified_date': get_timestamp(),
         'severity': vulnerability.get('severity'),
         'title': vulnerability.get('title') or "",
         'url': vulnerability.get('url') or "",
         'cvssV3': vulnerability.get('cvssV3') or "",
         'exploit': vulnerability.get('exploit') or "",
         'fixable': vulnerability.get('fixable') or "",
         'malicious': vulnerability.get('malicious'),
         'patch_exists': vulnerability.get('patchExists') or "",
         'snyk_pvt_vul': vulnerability.get('pvtVuln') or False
     }
예제 #7
0
def train(train_files, val_files, test_files, model_name, exp):
    # Complete args
    model_name = model_name.lower() if model_name else input(
        'model_name=?').lower()
    exp_num = 'exp_'
    n = str(exp) if exp else input('exp_?')
    exp_num += n

    logger.info('---------- Training {} on {} start ----------'.format(
        model_name, exp_num))

    # Load data
    logger.info('Loading training set: {}'.format(list(train_files)))
    train_df = load_corpus(list(train_files))
    if val_files != ():
        logger.info('Loading validation set: {}'.format(list(val_files)))
        val_df = load_corpus(list(val_files))
    else:
        val_df = None
    if test_files != ():
        logger.info('Loading test set: {}'.format(list(test_files)))
        test_df = load_corpus(list(test_files))
    else:
        test_df = None

    # Load configs
    cfg_path = get_envar('CONFIG_PATH') + '/' + get_envar('BASE_CONFIG')
    logger.info('Loading base_configs from {}'.format(cfg_path))
    base_configs = read_config(cfg_path, obj_view=True)

    logger.info('Loading exp_configs on {} from {}'.format(
        exp_num, base_configs.exp_configs.path))
    exp_configs = read_config(base_configs.exp_configs.path,
                              obj_view=False)[exp_num]

    description = exp_configs['description']
    hyparams = exp_configs['hyperparams']
    logger.info('Experiment description: {}'.format(description.strip()))
    logger.info('Hyperparams: {}'.format(hyparams))

    wdir = base_configs.model.savepath + get_timestamp() + '/'

    # Build Model
    lm = LexiconManager()
    dm = AbsaDataManager(lexicon_manager=lm)
    model = VALID_MODELS[model_name.lower()]
    model = model(datamanager=dm, parameters=hyparams)

    # Train
    model.train(train_df, val_df, test_df)

    # Predict and score on test
    if test_df is not None:
        _, _, loss_, acc3_ = model.score(test_df)
        logger.info('Final score on test set: '
                    'test_loss={loss:.4f} ' \
                    'test_acc3={acc:.2%}'\
                    .format(loss=loss_, acc=acc3_))

    # Save model
    model.save(wdir)

    # Close tf.Session, not really necessary but... anyway
    model.close_session()

    logger.info('---------- Training {} on {} end ----------'.format(
        model_name, exp_num))
예제 #8
0
def test_get_timestamp():
    """Test utils.get_timestamp()."""
    timestamp = get_timestamp()
    result = (datetime.datetime.utcnow()).strftime('%Y%m%d')
    assert result == timestamp
예제 #9
0
    Firstly we loop through all the Properties within the main account (Bestseller (Universal)) to get information related with all the views under each Property.
    Secondly we filter for the Views' profiles we want -- we only need brand-country level Views to pull data from.
    Thirdly we add more columns with information we need -- sitebrand, sitecountry, table_updated_time and etc..
"""

import pandas as pd
from datetime import datetime
from src.configure_logging import configure_logging
from src.ga_connector import GoogleAnalytics
from src.s3_toolkit import s3Toolkit
from src.utils import get_timestamp

# to avoid printing out logs from GA's module we need to setup our own logger
logger = configure_logging(logger_name=__name__)
ACCOUNT = 66188758
TIMESTAMP = get_timestamp()


def get_bucket_name(env):
    """"
    Gets Bucket Name according to the chosen environment

    Parameters:
    ----------
        env : string
            dev or prod
    Returns:
    ----------
        bucket_name: string
    """
예제 #10
0
    X_test = test.drop(drop_cols + [target_col], axis=1)

    # fill inf/nan
    X_train.replace(np.inf, np.nan, inplace=True)
    X_test.replace(np.inf, np.nan, inplace=True)
    X_train.fillna(X_train.mean(), inplace=True)
    X_test.fillna(X_train.mean(), inplace=True)

    # rankgauss transform
    # https://www.kaggle.com/c/porto-seguro-safe-driver-prediction/discussion/44629
    prep = QuantileTransformer(output_distribution="normal")
    X_cont_train = prep.fit_transform(X_train)
    X_cont_test = prep.transform(X_test)

    # train and predict
    timestamp = get_timestamp()
    oof_preds, test_preds, cv_scores = run(
        X_seq_train,
        X_cont_train,
        y_train,
        X_seq_test,
        X_cont_test,
        timestamp,
        random_state=0,
    )

    # save results
    print(cv_scores)
    output_dir = Path(f"./output/{timestamp}")
    output_dir.mkdir(parents=True)
    pd.DataFrame(oof_preds).to_csv(output_dir / f"{timestamp}_oof.csv",
예제 #11
0
static_template_path = None
compile_output_path = None
if args.compile is not None:
    static_template_path = Path(args.compile).resolve()
    if not static_template_path.is_file():
        exit_on_arg_error("ERROR - Invalid path for static template file"
                          " (--compile).")
    if args.compileoutput is not None:
        compile_output_path = Path(args.compileoutput).resolve()
        if not compile_output_path.parent.exists():
            exit_on_arg_error("ERROR - Invalid output path for compilation"
                              " - The output path directory does not exist."
                              " (--compileoutput).")
    else:
        compile_file_name = "tmplt" + get_timestamp() + ".py"
        compile_output_path = static_template_path.parent / compile_file_name

    compile_template(static_template_path, compile_output_path)

if args.execute:
    template_path = None
    template_path_parent_directory = None
    template_module = None
    if compile_output_path is None:
        if args.execute is "$compiled$":
            exit_on_arg_error("ERROR - A path must be passed to the execute"
                              " argument when execution does not occur along "
                              "with compilation (--execute <path>).")
        template_path = Path(args.execute).resolve()
        if not template_path.is_file():