コード例 #1
0
ファイル: mallet_entries.py プロジェクト: doc-vu/continuum
def retrain(data):
    """
    /var/opt/mallet/
    |- tool/
    |- data/
        |- dictionary
        |- 11397283704/
            |- text
            |- data
            |- model
        |- 11397283928/
    """
    MALLET_BIN = '/var/opt/mallet/tool/bin/mallet'
    MALLET_DATA_DIR = '/var/opt/mallet/data'

    import subprocess
    from os.path import abspath, dirname, join
    import sys
    sys.path.insert(0, join(dirname(dirname(abspath(__file__))), 'modules'))
    import utils
    sys.path.pop(0)

    latest_dir = utils.get_latest_dir(MALLET_DATA_DIR)
    new_dir = utils.create_child_dir(MALLET_DATA_DIR)
    text_path = '{}/text'.format(new_dir)
    data_path = '{}/data'.format(new_dir)
    model_path = '{}/model'.format(new_dir)
    dict_path = '{}/dictionary'.format(MALLET_DATA_DIR)

    with open('{}/text'.format(new_dir), 'w') as f:
        for chars in data:
            f.write(''.join(map(lambda d: chr(int(d)), chars)) + '\n')

    utils.execute_cmd("cp {} {}".format(dict_path, new_dir))
    dict_path = '{}/dictionary'.format(new_dir)

    utils.execute_cmd((
        "{bin} import-file --input {input} --output {output} --token-regex '[\p{{L}}\p{{P}}]+' "
        "--keep-sequence --remove-stopwords --use-pipe-from {dictionary} "
    ).format(bin=MALLET_BIN,
             input=text_path,
             output=data_path,
             dictionary=dict_path))

    utils.execute_cmd((
        "{bin} train-topics --input {input} --num-topics 10 --output-model {model} "
        "--num-iterations 1000 --show-topics-interval 1000000 {base_model}"
    ).format(bin=MALLET_BIN,
             input=data_path,
             model=model_path,
             base_model=('' if latest_dir is None else
                         '--input-model {}/model'.format(latest_dir))))
    import time
    time.sleep(10)
    utils.commit_dir(new_dir)
コード例 #2
0
ファイル: xgboost_entries.py プロジェクト: doc-vu/continuum
def retrain(data):
    """
    /var/opt/xgboost
    |- 00000000000/
        |- model
        |- lock
    |- 11397283704/
    ...
    """
    import xgboost as xgb
    import numpy as np
    from os.path import abspath, dirname, join
    import sys
    sys.path.insert(0, join(dirname(dirname(abspath(__file__))), 'modules'))
    import utils
    sys.path.pop(0)

    XGBOOST_DIR = '/var/opt/xgboost'
    latest_dir = utils.get_latest_dir(XGBOOST_DIR)

    if latest_dir is not None:
        base_model = xgb.Booster()
        base_model.load_model('{}/model'.format(latest_dir))
    else:
        base_model = None

    # retrain model
    X = np.array([d[1:] for d in data])
    y = np.array([d[0] for d in data])
    dataset = xgb.DMatrix(X, label=y)
    params = {
        'max_depth': 5,
        'eta': 0.1,
        'objective': 'binary:logistic',
        'silent': 0
    }
    model = xgb.train(params, dataset, 30, [], xgb_model=base_model)

    # save model
    new_dir = utils.create_child_dir(XGBOOST_DIR)
    model.save_model('{}/model'.format(new_dir))

    import time
    time.sleep(10)
    utils.commit_dir(new_dir)
コード例 #3
0
def retrain(data):
    """
    /var/opt/ppr/
    |- data/
        |- graph
        |- 11397283704/
        |- 11397283928/
    |- src/
    """
    PPR_DATA_DIR = '/var/opt/ppr/data'
    PPR_CODE_DIR = '/var/opt/ppr/src'

    import subprocess
    from os.path import abspath, dirname, join
    import sys
    sys.path.insert(0, join(dirname(dirname(abspath(__file__))), 'modules'))
    import utils
    sys.path.pop(0)

    latest_dir = utils.get_latest_dir(PPR_DATA_DIR)
    new_dir = utils.create_child_dir(PPR_DATA_DIR)

    with open('{}/edges'.format(new_dir), 'w') as f:
        for (u, v) in data:
            f.write('{} {}\n'.format(int(u), int(v)))

    if latest_dir is not None:
        cmd = '''
            cd {cwd} &&
            export SBT_OPTS="-Xmx8G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=8G" &&
            sbt "run retrain {prev_dir} {res_dir} edges {res_dir}" &>{res_dir}/log
            '''.format(
            cwd=PPR_CODE_DIR, prev_dir=latest_dir, res_dir=new_dir)
    else:
        cmd = '''
            cd {cwd} &&
            export SBT_OPTS="-Xmx8G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=8G" &&
            sbt "run train {graph_dir} graph 0.0015 0.05 89805 {res_dir} edges {res_dir}" &>{res_dir}/log
            '''.format(
            cwd=PPR_CODE_DIR, graph_dir=PPR_DATA_DIR, res_dir=new_dir)

    utils.execute_cmd(cmd)
    import time
    time.sleep(10)
    utils.commit_dir(new_dir)
コード例 #4
0
ファイル: mf_entries.py プロジェクト: doc-vu/continuum
def retrain(data):
    """
    /var/opt/mf/
    |- data/
        |- 0000000000/
            |- model
            |- lock
        |- 1523335358/
        ...
    |- src/
    """
    MF_DATA_DIR = '/var/opt/mf/data'
    MF_CODE_DIR = '/var/opt/mf/src'

    import subprocess
    import time
    from os.path import abspath, dirname, join
    import sys
    sys.path.insert(0, join(dirname(dirname(abspath(__file__))), 'modules'))
    import utils
    sys.path.pop(0)

    latest_dir = utils.get_latest_dir(MF_DATA_DIR)
    new_dir = utils.create_child_dir(MF_DATA_DIR)

    with open('{}/data'.format(new_dir), 'w') as f:
        for (user, item) in data:
            f.write('{}\t{}\t1.0\t00000000\n'.format(int(user), int(item)))

    # We will not create a new model every time and always use the initial base model
    # because the model takes too much space. This is the reason why we skip committing
    # and modify FastMF source code to skip saving.
    cmd = '''
        cd {cwd} &&
        mill FastMF.run retrain {prev_dir}/model {res_dir}/data {res_dir}/model &>{res_dir}/log
        '''.format(cwd=MF_CODE_DIR, prev_dir=latest_dir, res_dir=new_dir)

    utils.execute_cmd(cmd)
    time.sleep(10)
コード例 #5
0
                    '--nb_tests',
                    default=10,
                    type=int,
                    dest="nb_tests",
                    help="Number of evaluation to perform.")
parser.add_argument('-f',
                    '--folder',
                    default=None,
                    type=str,
                    dest="folder",
                    help="Folder where the models are saved")
args = parser.parse_args()

if args.folder is None:
    args.folder = os.path.join('runsFlatPlate/',
                               get_latest_dir('runsFlatPlate/'))

with open(os.path.join(args.folder, 'configuration.yaml'), 'r') as file:
    config = yaml.safe_load(file)

device = torch.device('cpu')

if not os.path.exists(args.folder + '/test/'):
    os.mkdir(args.folder + '/test/')

## --------------------------------- Environment settigs ------------------------------

# final position of the problem
xB = config["XB"]
yB = config["YB"]
コード例 #6
0
import sys
sys.path.extend(["../commons/"])

import argparse
import yaml
import gym
import torch

from model import Model
from utils import get_latest_dir

with open('config.yaml', 'r') as file:
    config = yaml.safe_load(file)

default_dir = 'runs/' + get_latest_dir('runs/')
parser = argparse.ArgumentParser(description='Test DDPG on ' + config["GAME"])
parser.add_argument('--render',
                    action='store_true',
                    dest="render",
                    help='Display the tests')
parser.add_argument('-n',
                    '--nb_tests',
                    default=10,
                    type=int,
                    dest="nb_tests",
                    help="Number of evaluation to perform.")
parser.add_argument('-f',
                    '--folder',
                    default=default_dir,
                    type=str,
                    dest="folder",