def retrain(data): """ /var/opt/mallet/ |- tool/ |- data/ |- dictionary |- 11397283704/ |- text |- data |- model |- 11397283928/ """ MALLET_BIN = '/var/opt/mallet/tool/bin/mallet' MALLET_DATA_DIR = '/var/opt/mallet/data' import subprocess from os.path import abspath, dirname, join import sys sys.path.insert(0, join(dirname(dirname(abspath(__file__))), 'modules')) import utils sys.path.pop(0) latest_dir = utils.get_latest_dir(MALLET_DATA_DIR) new_dir = utils.create_child_dir(MALLET_DATA_DIR) text_path = '{}/text'.format(new_dir) data_path = '{}/data'.format(new_dir) model_path = '{}/model'.format(new_dir) dict_path = '{}/dictionary'.format(MALLET_DATA_DIR) with open('{}/text'.format(new_dir), 'w') as f: for chars in data: f.write(''.join(map(lambda d: chr(int(d)), chars)) + '\n') utils.execute_cmd("cp {} {}".format(dict_path, new_dir)) dict_path = '{}/dictionary'.format(new_dir) utils.execute_cmd(( "{bin} import-file --input {input} --output {output} --token-regex '[\p{{L}}\p{{P}}]+' " "--keep-sequence --remove-stopwords --use-pipe-from {dictionary} " ).format(bin=MALLET_BIN, input=text_path, output=data_path, dictionary=dict_path)) utils.execute_cmd(( "{bin} train-topics --input {input} --num-topics 10 --output-model {model} " "--num-iterations 1000 --show-topics-interval 1000000 {base_model}" ).format(bin=MALLET_BIN, input=data_path, model=model_path, base_model=('' if latest_dir is None else '--input-model {}/model'.format(latest_dir)))) import time time.sleep(10) utils.commit_dir(new_dir)
def retrain(data): """ /var/opt/xgboost |- 00000000000/ |- model |- lock |- 11397283704/ ... """ import xgboost as xgb import numpy as np from os.path import abspath, dirname, join import sys sys.path.insert(0, join(dirname(dirname(abspath(__file__))), 'modules')) import utils sys.path.pop(0) XGBOOST_DIR = '/var/opt/xgboost' latest_dir = utils.get_latest_dir(XGBOOST_DIR) if latest_dir is not None: base_model = xgb.Booster() base_model.load_model('{}/model'.format(latest_dir)) else: base_model = None # retrain model X = np.array([d[1:] for d in data]) y = np.array([d[0] for d in data]) dataset = xgb.DMatrix(X, label=y) params = { 'max_depth': 5, 'eta': 0.1, 'objective': 'binary:logistic', 'silent': 0 } model = xgb.train(params, dataset, 30, [], xgb_model=base_model) # save model new_dir = utils.create_child_dir(XGBOOST_DIR) model.save_model('{}/model'.format(new_dir)) import time time.sleep(10) utils.commit_dir(new_dir)
def retrain(data): """ /var/opt/ppr/ |- data/ |- graph |- 11397283704/ |- 11397283928/ |- src/ """ PPR_DATA_DIR = '/var/opt/ppr/data' PPR_CODE_DIR = '/var/opt/ppr/src' import subprocess from os.path import abspath, dirname, join import sys sys.path.insert(0, join(dirname(dirname(abspath(__file__))), 'modules')) import utils sys.path.pop(0) latest_dir = utils.get_latest_dir(PPR_DATA_DIR) new_dir = utils.create_child_dir(PPR_DATA_DIR) with open('{}/edges'.format(new_dir), 'w') as f: for (u, v) in data: f.write('{} {}\n'.format(int(u), int(v))) if latest_dir is not None: cmd = ''' cd {cwd} && export SBT_OPTS="-Xmx8G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=8G" && sbt "run retrain {prev_dir} {res_dir} edges {res_dir}" &>{res_dir}/log '''.format( cwd=PPR_CODE_DIR, prev_dir=latest_dir, res_dir=new_dir) else: cmd = ''' cd {cwd} && export SBT_OPTS="-Xmx8G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=8G" && sbt "run train {graph_dir} graph 0.0015 0.05 89805 {res_dir} edges {res_dir}" &>{res_dir}/log '''.format( cwd=PPR_CODE_DIR, graph_dir=PPR_DATA_DIR, res_dir=new_dir) utils.execute_cmd(cmd) import time time.sleep(10) utils.commit_dir(new_dir)
def retrain(data): """ /var/opt/mf/ |- data/ |- 0000000000/ |- model |- lock |- 1523335358/ ... |- src/ """ MF_DATA_DIR = '/var/opt/mf/data' MF_CODE_DIR = '/var/opt/mf/src' import subprocess import time from os.path import abspath, dirname, join import sys sys.path.insert(0, join(dirname(dirname(abspath(__file__))), 'modules')) import utils sys.path.pop(0) latest_dir = utils.get_latest_dir(MF_DATA_DIR) new_dir = utils.create_child_dir(MF_DATA_DIR) with open('{}/data'.format(new_dir), 'w') as f: for (user, item) in data: f.write('{}\t{}\t1.0\t00000000\n'.format(int(user), int(item))) # We will not create a new model every time and always use the initial base model # because the model takes too much space. This is the reason why we skip committing # and modify FastMF source code to skip saving. cmd = ''' cd {cwd} && mill FastMF.run retrain {prev_dir}/model {res_dir}/data {res_dir}/model &>{res_dir}/log '''.format(cwd=MF_CODE_DIR, prev_dir=latest_dir, res_dir=new_dir) utils.execute_cmd(cmd) time.sleep(10)
'--nb_tests', default=10, type=int, dest="nb_tests", help="Number of evaluation to perform.") parser.add_argument('-f', '--folder', default=None, type=str, dest="folder", help="Folder where the models are saved") args = parser.parse_args() if args.folder is None: args.folder = os.path.join('runsFlatPlate/', get_latest_dir('runsFlatPlate/')) with open(os.path.join(args.folder, 'configuration.yaml'), 'r') as file: config = yaml.safe_load(file) device = torch.device('cpu') if not os.path.exists(args.folder + '/test/'): os.mkdir(args.folder + '/test/') ## --------------------------------- Environment settigs ------------------------------ # final position of the problem xB = config["XB"] yB = config["YB"]
import sys sys.path.extend(["../commons/"]) import argparse import yaml import gym import torch from model import Model from utils import get_latest_dir with open('config.yaml', 'r') as file: config = yaml.safe_load(file) default_dir = 'runs/' + get_latest_dir('runs/') parser = argparse.ArgumentParser(description='Test DDPG on ' + config["GAME"]) parser.add_argument('--render', action='store_true', dest="render", help='Display the tests') parser.add_argument('-n', '--nb_tests', default=10, type=int, dest="nb_tests", help="Number of evaluation to perform.") parser.add_argument('-f', '--folder', default=default_dir, type=str, dest="folder",