def __init__(self, layers, activation, activation_last=None, batch_norm=False, initialize=True, *args, **kwargs): super(MLPBlock, self).__init__(*args, **kwargs) from utils import get_module _layers = [] for i, node in enumerate(layers): if i == len(layers) - 1: break else: _layers.append(nn.Linear(layers[i], layers[i + 1])) if batch_norm: _layers.append(nn.BatchNorm1d(layers[i + 1])) if i == len(layers) - 2: if activation_last is None: _layers.append(get_module([nn], 'Identity')()) else: _layers.append(get_module([nn], activation_last)()) else: _layers.append(get_module([nn], activation)()) self._layers = nn.Sequential(*_layers) if initialize: self.apply(self._init_weights)
def main(): cli_args = read_args() param_config = Config(cli_args.config) N_TRIES = 25 np.random.seed(cli_args.seed) fn = get_module(cli_args.fn)() optimizer = get_module(cli_args.optimizer)(N_TRIES, param_config, fn, plot=cli_args.plot) best_input, best_output = optimizer.optimize() optimal_in, optimal_out = fn.optimal_input() print( "max value: {} with input: {}, optimal output: {opt_out} with input: {opt_in} diff: {diff}" .format(best_output, best_input, opt_out=optimal_out, opt_in=optimal_in, diff=optimal_out - best_output)) write_output(cli_args, param_config, optimizer, fn)
def initialize(self, model, dataloaders, input_key: str, target_key: str, criterion: str, device='cpu', optimizer: str = 'Adam', do_manual_decay: bool = False, hp_epochs: int = 10, hp_lr: float = 1e-3, lr: float = 1e-3, patience: int = 10, hps: dict = {}, **kwargs): self._model = model self._dataloaders = dataloaders self._input_key = input_key self._target_key = target_key if isinstance(criterion, str): from models import MyLoss self._criterion = get_module([nn, MyLoss], criterion)() else: self._criterion = criterion self._device = device from torch import optim self._optimizer = get_module([optim], optimizer)(model.parameters()) self._do_manual_decay = do_manual_decay self._hp_epochs = hp_epochs self._hp_lr = hp_lr self._lr = lr self._patience = patience self._hps = hps
def get_module(self, base_url=None, username=None, password=None): if base_url == None: base_url = self.url if username == None: username = self.username if password == None: password = self.password utils.get_module(base_url, username, password, str(platform))
def get_module(self, base_url=None, username=None, password=None): if base_url == None: base_url = self.url if username == None: username = self.username if password == None: password = self.password user_data_dir = App.get_running_app().user_data_dir tar = join(user_data_dir, "swm.tgz") ini = join(user_data_dir, "swm.ini") swm_json = join(user_data_dir, "swm.json") swm_dir = join(user_data_dir, "swm") utils.get_module(base_url, username, password, str(platform), tar, ini, swm_json, swm_dir, user_data_dir)
def run(interp, line): line = line.strip() try: [(name, module, args) ] = re.findall('^([a-z][a-zA-Z_0-9]*) = ([a-z][a-zA-Z_0-9]*)\((.*)\)', line) except ValueError: print 'Error: failed to parse load command.' print return if module not in available: print 'did not recognize loader %r' % module return m = get_module('load', module)(interp, name) try: exec 'm.main(%s)' % args except SyntaxError as e: print 'Syntax error: %s' % e return return interp.run_agenda()
def __init__(self, dir_path): self.dir_path = dir_path self.hash = None self.file_paths = [] self.ea_list = [] self.tags = set() if self.event_accumulator is None: self.event_accumulator = get_module( 'tensorboard.backend.event_processing.event_accumulator') if not os.path.isdir(dir_path): return for file in os.listdir(dir_path): file_path = os.path.join(self.dir_path, file) if os.path.isfile(file_path) and 'tfevents' in file: self.file_paths.append(file_path) if len(self.file_paths) > 0: for file_path in self.file_paths: ea_inst = self.event_accumulator.EventAccumulator( file_path, size_guidance={ self.event_accumulator.SCALARS: 0, }) ea_inst.Reload() ea_tags = ea_inst.Tags().get('scalars') or [] self.ea_list.append({ 'eq': ea_inst, 'tags': ea_tags, }) for t in ea_tags: self.tags.add(t)
def train_algorithm(request): module_id = request.GET.get('module_id') scene_id = request.GET.get('scene_id') limit = request.GET.get('limit') if scene_id and module_id: tmp = utils.get_scene_record(module_id,scene_id) for i in tmp: i['data_length'] = range(len(i['data'][i['data'].keys()[0]])) i['resources'] = [] i['apis'] = [] i['api_info'] = [] api_dict = {} for k in i['data'].keys(): if k != 'total' and k.find('#api#') != 0: i['resources'].append(k) if k != 'total' and k.find('#api#') == 0: api_dict[k[5:]] = i['data'][k] #this_api_id = utils.get_api_by_name(k[5:]) i['api_info'].append(k) # TODO for j in i['data_length']: current_api_dict = {} for k,v in api_dict.iteritems(): current_api_dict[k] = v[j] i['apis'].append(current_api_dict) if limit and int(limit) > 0: ret = {'scene_records' : tmp[:int(limit)]} else: ret = {'scene_records': tmp} ret['module_id'] = module_id ret['scene_id'] = scene_id scene_api = utils.get_scene_api(module_id, scene_id) for s in scene_api: s['api_info'] = utils.get_api(s.get('api_id')) # ge threhold if s['api_info']: s['api_info']['threholds'] = utils.get_api_resource(s.get('api_id')) for th in s['api_info']['threholds'].get('resource_list'): th['name'] = utils.get_resource(th.get('resource_id')).get('name') ret['scene_info'] = utils.get_scene(scene_id) ret['module_info'] = utils.get_module(module_id) ret['scene_api'] = scene_api ret['all_resource'] = [] all_resource_ids = [] # get all resource need for s in scene_api: for id in s.get('api_info').get('threholds').get('resource_id'): if not id in all_resource_ids: all_resource_ids.append(id) ret['all_resource'].append(utils.get_resource(id)) ret["public"] = utils.get_public(request) return render(request, 'assess/train_algorithm.html', {'data': ret}) else: return render(request, 'error.html')
def __init__(self, layers, activation=None, batch_norm=False, initialize=True, *args, **kwargs): super(LSTMBlock, self).__init__(*args, **kwargs) from collections import OrderedDict from utils import get_module _layers = OrderedDict() for i, node in enumerate(layers): if i == len(layers) - 1: break else: _layers[f'LSTM{i}'] = nn.LSTM(layers[i], layers[i + 1]) if batch_norm: _layers['batchnorm1d'] = nn.BatchNorm1d(layers[-1]) if activation is not None: _layers[activation] = get_module([nn], activation)() self._layers = nn.Sequential(_layers) if initialize: self.apply(self._init_weights)
def iterate_values_det(position_transfers, rewards, discount_factor, convergence_factor): length = len(position_transfers) width = len(position_transfers[0]) values = [0 for x in range(0, length)] policy = [0 for x in range(0, length)] delta = None while delta is None or delta >= convergence_factor: delta = 0 for position in range(0, length): vs_old = values[position] values[position] = None for place in range(0, width): next_position = position_transfers[position][place] - 1 next_reward = rewards[position][place] value = next_reward + (discount_factor * (values[next_position] or 0)) if values[position] is None or value > values[position]: values[position] = value policy[position] = place + 1 delta = max(delta, utils.get_module(values[position] - vs_old)) return policy
def __init__(self, layers_conv2d=None, initialize=True, *args, **kwargs): super(Conv2DBlock, self).__init__(*args, **kwargs) from copy import copy from utils import get_module _layers = [] conv2d_args = {"stride": 1, "padding": 0, "activation": 'ReLU'} maxpooling2d_args = {"kernel_size": 2, "stride": 2} for layer, args in layers_conv2d: if layer == 'conv2d': layer_args = copy(conv2d_args) layer_args.update(args) activation = layer_args.pop('activation') _layers.append(nn.Conv2d(**layer_args)) _layers.append(get_module([nn], activation)()) elif layer == 'maxpooling2d': layer_args = copy(maxpooling2d_args) layer_args.update(args) _layers.append(nn.MaxPool2d(**layer_args)) else: raise ValueError(f"{layer} is not implemented") self._layers = nn.Sequential(*_layers) if initialize: self.apply(self._init_weights)
def do_help(self, line): mod = line.split() if len(mod) <= 1: return super(REPL, self).do_help(line) else: if len(mod) == 2: [cmd, sub] = mod if cmd in ('load', 'post'): m = get_module(cmd, sub) if m: print m.__doc__ else: print 'No help available for "%s %s"' % (cmd, sub) return
def do_help(self, line): mod = line.split() if len(mod) <= 1: return super(REPL, self).do_help(line) else: if len(mod) == 2: [cmd, sub] = mod if cmd in ("load", "post"): m = get_module(cmd, sub) if m: print m.__doc__ else: print 'No help available for "%s %s"' % (cmd, sub) return
def evaluate_policy_det(position_transfers, rewards, policy, discount_factor, convergence_factor): length = len(position_transfers) values = [0 for x in range(0, length)] convergence_delta = None while convergence_delta >= convergence_factor or convergence_delta is None: values_new = copy.deepcopy(values) convergence_delta = 0 for position in range(0, length): policy_action = policy[position][0] - 1 next_position = position_transfers[position][policy_action] - 1 next_reward = rewards[position][policy_action] values[position] = next_reward + (discount_factor * values_new[next_position]) convergence_delta = max(convergence_delta, utils.get_module(values[position] - values_new[position])) return values
def analyze(filepath): """ Analyze the module pointed by `filepath` """ #Get module as ast node root = get_module(filepath) #create symbol table #The symbol table creation must be a separate phase from dependency tree creation #since Python does not evaluate, e.g. Functions on parse. Therefore, entities can be used before #being defined. #The alternative approach would be to have one pass, and resolve symbols as #soon as they become available; however, existing solution is closer to how Python works symbol_table = create_symbol_table(root) print_symtable(symbol_table) #find dependencies dependency_tree = create_dependency_tree(root, symbol_table)
def analyze(module_path): """ Analyze dependencies starting at `module_path` """ #view the module as a AST node object module = get_module(module_path) nodes = [] NodeVisitor().visit(module, nodes) #Modify main module node to give it a name attr if not hasattr(nodes[0][0], "name"): nodes[0][0].name = name_from_path(module_path) #symbolic_pretty_print(nodes) #pretty_print(nodes) #create_symbol_table(nodes[0]) find_dependencies(nodes[0])
def run(module_id, requested_widget=None, generate_widget=True): module = utils.get_module(module_id) widgets = [] if module is None: return if 'widgets' not in module: return for i in range(len(module["widgets"])): for j in range(len(module["widgets"][i])): # for each widget widget = module["widgets"][i][j] if requested_widget is not None and widget[ "widget_id"] != requested_widget: continue if not widget["enabled"]: continue # generate the widget if "layout" not in widget: continue for k in range(len(widget["layout"])): layout = widget["layout"][k] chart_generated = True if layout["type"] == "sensor_group_summary": if generate_widget: add_sensor_group_summary_chart(layout, widget) break elif layout["type"] == "image": if generate_widget: add_sensor_image(layout, widget) break elif layout["type"] == "sensor_group_timeline": if generate_widget: add_sensor_group_timeline_chart(layout, widget) break elif layout["type"] == "chart_short" or layout[ "type"] == "chart_short_inverted": if generate_widget: add_sensor_chart(layout, widget) break elif layout["type"] == "map": if generate_widget: add_sensor_map(layout, widget) break else: chart_generated = False continue if chart_generated: widgets.append(widget["widget_id"]) return widgets
def run(interp, line): line = line.strip() try: [(module, args)] = re.findall('([a-z][a-zA-Z_0-9]*)\((.*)\)$', line) except ValueError: print 'Error: failed to parse post command.' print return if module not in available: print 'did not recognize post-processor %r' % module return m = get_module('post', module)(interp) try: exec 'm.main(%s)' % args except SyntaxError as e: print 'Syntax error: %s' % e return
def __call__(self, parser, namespace, value, option_string=None): if namespace.init and namespace.create: print("Optional arguments --init and --create can't be used together") sys.exit(1) # Case where non argument is given if not namespace.init and not namespace.create: namespace.create = True if namespace.create : if not os.path.isdir(os.path.realpath(value)): print("{0} table project doesn't exist yet. \n \tpython myql-cli table -i {0} ".format(value)) sys.exit(1) module_path = os.path.realpath(value) module = get_module(module_path) tables = [ v for k,v in module.__dict__.items() if isinstance(v, TableMeta) and k != 'TableModel'] for table in tables : table_name = table.table.name path= os.path.realpath(value) table.table.save(name=table_name, path=path) sys.exit(0) if namespace.init : folder = value if not create_directory(folder): print("This project already exists !!!") sys.exit(0) create_init_file(folder) create_tables_file(folder) sys.exit(0) sys.exit(1)
def run(interp, line): line = line.strip() try: [(name, module, args)] = re.findall('^([a-z][a-zA-Z_0-9]*) = ([a-z][a-zA-Z_0-9]*)\((.*)\)', line) except ValueError: print 'Error: failed to parse load command.' print return if module not in available: print 'did not recognize loader %r' % module return m = get_module('load', module)(interp, name) try: exec 'm.main(%s)' % args except SyntaxError as e: print 'Syntax error: %s' % e return return interp.run_agenda()
def fit(self, X, y=None, fasta_path=None): """ Parameters ---------- X : array, (n_samples, 1) Contains the index numbers of fasta sequnce in the fasta file. y : array or list Target values. fasta_path : str File path to the fasta file. Returns ------- self """ if fasta_path: self.fasta_path = fasta_path if not self.fasta_path: raise ValueError("`fasta_path` can't be None!") pyfaidx = get_module('pyfaidx') fasta_file = pyfaidx.Fasta(self.fasta_path) # set up the sequence_length from the first entry sequence_length = len(fasta_file[int(X[0, 0])]) if not self.padding: for idx in X[:, 0]: fasta_record = fasta_file[int(idx)] if len(fasta_record) != sequence_length: raise ValueError("The first sequence record contains " "%d bases, while %s contrain %d bases" % (sequence_length, repr(fasta_record), len(fasta_record))) self.fasta_file = fasta_file self.sequence_length = sequence_length return self
def main(conf: str, seed: int, gpu_index: int, data_path: str): global DEVICE conf = load_config(conf) if seed is not None: conf.seed = seed if gpu_index is not None and DEVICE == torch.device('cuda'): DEVICE = torch.device(f'cuda:{gpu_index}') if data_path is not None: conf['dataset']['params']['data_path'] = data_path logger.info(DEVICE) logger.info(conf) set_seed(conf.seed) from models import sub_task tau4vec = set_task(conf.sub_task_params, 'tau4vec', sub_task) logger.info('set_task: tau4vec') set_seed(conf.seed) higgsId = set_task(conf.sub_task_params, 'higgsId', sub_task) logger.info('set_task: higgsId') from models import MyDataset from models import MyMetrics set_seed(conf.seed) dataset = set_module([MyDataset], conf, 'dataset') set_seed(conf.seed) dataloader = DataLoader(dataset, batch_size=100, shuffle=True) logger.info('set dataloader') # ######################################################################### # pre-train ############################################################### # ######################################################################### logger.info('----- pretrain[0] start -----') pretrain_conf = conf.sub_task_params.tau4vec.pretrain for i, sub_model in enumerate(tau4vec): logger.info(f'pretrain: [0][{i}]') set_seed(conf.seed) optimizer = set_module([optim], pretrain_conf, 'optimizer', params=sub_model.parameters()) loss_func = set_module([nn, MyLoss], pretrain_conf, 'loss_func') metrics = set_module([MyMetrics], pretrain_conf, 'metrics') activation = set_module([nn], pretrain_conf, 'activation') input_key = pretrain_conf.data.input_key target_key = pretrain_conf.data.target_key patience = pretrain_conf.patience tau4vec[i] = sub_task.pre_train(epochs=pretrain_conf.epochs, model=sub_model, dataloader=dataloader, optimizer=optimizer, loss_func=loss_func, input_key=input_key, target_key=target_key, device=DEVICE, patience=patience, metrics=metrics, activation=activation) logger.info('----- pretrain[0] end -----') logger.info('----- pretrain[1] start -----') pretrain_conf = conf.sub_task_params.higgsId.pretrain for i, sub_model in enumerate(higgsId): logger.info(f'pretrain: [1][{i}]') set_seed(conf.seed) optimizer = set_module([optim], pretrain_conf, 'optimizer', params=sub_model.parameters()) loss_func = set_module([nn], pretrain_conf, 'loss_func') metrics = set_module([MyMetrics], pretrain_conf, 'metrics') activation = set_module([nn], pretrain_conf, 'activation') input_key = pretrain_conf.data.input_key target_key = pretrain_conf.data.target_key patience = pretrain_conf.patience higgsId[i] = sub_task.pre_train(epochs=pretrain_conf.epochs, model=sub_model, dataloader=dataloader, optimizer=optimizer, loss_func=loss_func, input_key=input_key, target_key=target_key, device=DEVICE, patience=patience, metrics=metrics, activation=activation) logger.info('----- pretrain[1] end -----') # ######################################################################### # ######################################################################### logger.info('copy the pretrain models') pre_trained_tau4vec = set_task(conf.sub_task_params, 'tau4vec', sub_task) pre_trained_higgsId = set_task(conf.sub_task_params, 'higgsId', sub_task) pre_trained_model = [pre_trained_tau4vec, pre_trained_higgsId] task = [tau4vec, higgsId] for num_task, sub in enumerate(task): for num_model in range(len(sub)): pre_trained_model[num_task][num_model].load_state_dict( deepcopy(task[num_task][num_model].state_dict())) # ######################################################################### # ######################################################################### logger.info('----- SPOS-NAS start -----') sposnas_conf = conf.SPOS_NAS def make_output_dict(): return { 'X': [], 'AUC': { f'{f}_{s}': [] for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME) }, 'LOSS_1ST': { f'{f}_{s}': [] for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME) }, 'LOSS_2ND': { f'{f}_{s}': [] for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME) }, 'RATIO': { f'{f}_{s}': [] for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME) }, 'ONLY_PT_RATIO': { f'{f}_{s}': [] for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME) }, } # evaluate only pre-train model loss_func = [ set_module([nn, MyLoss], sposnas_conf, 'loss_first'), set_module([nn, MyLoss], sposnas_conf, 'loss_second') ] loss_weight = [0.5, 0.5] metrics = get_module([MyMetrics], 'Calc_Auc')() from models.SPOS_NAS import SPOS model = SPOS(task=task, loss_func=loss_func, loss_weight=loss_weight) model.to(DEVICE) logger.info('evaluate only pre-train model') dummy = make_output_dict() for now_choice in product(range(3), range(3)): pre_train_result = evaluate(model, conf, dataloader, metrics, dummy, now_choice) output_dict = make_output_dict() X_list = [0.0, 0.1, 0.5] for X in (np.array(X_list)).round(10): output_dict['X'].append(X) logger.info(f'loss_ratio: {X:.6f} (loss_1*X + loss_2*(1-X)) start') set_seed(conf.seed) def initialize_pretrain_weight(): logger.info('load pretrain models...') for num_task, sub in enumerate(task): for num_model in range(len(sub)): task[num_task][num_model].load_state_dict( deepcopy(pre_trained_model[num_task] [num_model].state_dict())) logger.info('load pretrain models done') logger.info('set model parameters...') loss_func = [ set_module([nn, MyLoss], sposnas_conf, 'loss_first'), set_module([nn, MyLoss], sposnas_conf, 'loss_second') ] loss_weight = [X, 1. - X] metrics = get_module([MyMetrics], 'Calc_Auc')() for now_choice in product(range(3), range(3)): initialize_pretrain_weight() model = SPOS(task=task, loss_func=loss_func, loss_weight=loss_weight) model.to(DEVICE) optimizer = set_module([optim], sposnas_conf, 'optimizer', params=model.parameters()) scheduler = set_module([optim.lr_scheduler], sposnas_conf, 'scheduler', optimizer=optimizer) logger.info('set model parameters done') logger.info('fit model...') model.fit(epochs=sposnas_conf.epochs, dataloader=dataloader, device=DEVICE, optimizer=optimizer, scheduler=scheduler, patience=sposnas_conf.patience, choice=now_choice) logger.info('fit model done') logger.info('eval model...') output_dict = evaluate(model, conf, dataloader, metrics, output_dict, now_choice) logger.info('eval model done') logger.info(f'seed: {conf.seed}/ pretrain result: {pre_train_result}') logger.info(f'seed: {conf.seed}/ final result: {output_dict}') logger.info('all train and eval step are done') logger.info('plot results...') logger.info('plot auc...') import matplotlib.pyplot as plt plt.style.use('seaborn-darkgrid') import pandas as pd df = pd.DataFrame(output_dict['AUC'], index=output_dict['X']) df = df.rename( columns={ f'{f}_{s}': f'{f}:{s}' for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME) }) df.plot() plt.xlabel('X') plt.ylabel('AUC') plt.savefig(f'grid_auc_{conf.seed}.png') plt.close() logger.info('plot loss_2ND...') import matplotlib.pyplot as plt plt.style.use('seaborn-darkgrid') df = pd.DataFrame(output_dict['LOSS_2ND'], index=output_dict['X']) df = df.rename( columns={ f'{f}_{s}': f'{f}:{s}' for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME) }) df.plot() plt.xlabel('X') plt.ylabel('LOSS_2ND') plt.savefig(f'grid_loss_2nd_{conf.seed}.png') plt.close() logger.info('plot loss_1ST...') import matplotlib.pyplot as plt plt.style.use('seaborn-darkgrid') df = pd.DataFrame(output_dict['LOSS_1ST'], index=output_dict['X']) df = df.rename( columns={ f'{f}_{s}': f'{f}:{s}' for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME) }) df.plot() plt.xlabel('X') plt.ylabel('LOSS_1ST') plt.savefig(f'grid_loss_1st_{conf.seed}.png') plt.close() logger.info('plot ratios...') import matplotlib.pyplot as plt plt.style.use('seaborn-darkgrid') df = pd.DataFrame(output_dict['ONLY_PT_RATIO'], index=output_dict['X']) df = df.rename( columns={ f'{f}_{s}': f'{f}:{s}' for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME) }) df.plot() plt.ylabel('ratio') plt.savefig(f'grid_only_pt_ratio_{conf.seed}.png') plt.close() import matplotlib.pyplot as plt plt.style.use('seaborn-darkgrid') df = pd.DataFrame(output_dict['RATIO'], index=output_dict['X']) df = df.rename( columns={ f'{f}_{s}': f'{f}:{s}' for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME) }) df.plot() plt.ylabel('ratio') plt.savefig(f'grid_ratio_{conf.seed}.png') plt.close() logger.info('plot results done')
import sys from os.path import join reload(sys) sys.setdefaultencoding('utf-8') from kivy.app import App from kivy.utils import platform import utils try: user_data_dir = App.get_running_app().user_data_dir tar = join(user_data_dir, "swm.tgz") ini = join(user_data_dir, "swm.ini") swm_json = join(user_data_dir, "swm.json") swm_dir = join(user_data_dir, "swm") utils.get_module(None, None, None, str(platform), tar, ini, swm_json, swm_dir, user_data_dir) #utils.get_module() except: print "Unexpected error:", sys.exc_info()[0] from portal import PortalApp PortalApp().run() user_data_dir = App.get_running_app().user_data_dir sys.path.append(user_data_dir) print "!!!!" print user_data_dir print "!!!!" reload(sys) from swm.main import SWMApp SWMApp().run()
def get_network_module(name): return get_module('.' + name, package='overlay')
def main(conf: str, seed: int, gpu_index: int, data_path: str, event: int): global DEVICE, FIRST_MODEL_NAME, SECOND_MODEL_NAME, MODELNAME_CHOICE_INDEX conf = load_config(conf) if seed is not None: conf.seed = seed if gpu_index is not None and DEVICE == torch.device('cuda'): DEVICE = torch.device(f'cuda:{gpu_index}') if data_path is not None: conf['dataset']['params']['data_path'] = data_path if event is not None: conf['dataset']['params']['max_events'] = event logger.info(DEVICE) logger.info(conf) FIRST_MODEL_NAME = [ i['name'].split('_')[-1][:-4] + f'-{num}' for num, i in enumerate(conf.sub_task_params.tau4vec.tasks) ] SECOND_MODEL_NAME = [ i['name'].split('_')[-1][:-4] + f'-{num}' for num, i in enumerate(conf.sub_task_params.higgsId.tasks) ] MODELNAME_CHOICE_INDEX = { f'{n1}_{n2}': v for (n1, n2), v in zip( product(FIRST_MODEL_NAME, SECOND_MODEL_NAME), product(range(len(FIRST_MODEL_NAME)), range(len( SECOND_MODEL_NAME)))) } set_seed(conf.seed) from models import sub_task tau4vec = set_task(conf.sub_task_params, 'tau4vec', sub_task) logger.info('set_task: tau4vec') set_seed(conf.seed) higgsId = set_task(conf.sub_task_params, 'higgsId', sub_task) logger.info('set_task: higgsId') from models import MyDataset from models import MyMetrics set_seed(conf.seed) dataset = set_module([MyDataset], conf, 'dataset') set_seed(conf.seed) dataloader = DataLoader(dataset, batch_size=100, shuffle=True) logger.info('set dataloader') # ######################################################################### # pre-train ############################################################### # ######################################################################### logger.info('----- pretrain[0] start -----') pretrain_conf = conf.sub_task_params.tau4vec.pretrain for i, sub_model in enumerate(tau4vec): logger.info(f'pretrain: [0][{i}]') set_seed(conf.seed) optimizer = set_module([optim], pretrain_conf, 'optimizer', params=sub_model.parameters()) loss_func = set_module([nn, MyLoss], pretrain_conf, 'loss_func') metrics = set_module([MyMetrics], pretrain_conf, 'metrics') activation = set_module([nn], pretrain_conf, 'activation') input_key = pretrain_conf.data.input_key target_key = pretrain_conf.data.target_key patience = pretrain_conf.patience tau4vec[i] = sub_task.pre_train(epochs=pretrain_conf.epochs, model=sub_model, dataloader=dataloader, optimizer=optimizer, loss_func=loss_func, input_key=input_key, target_key=target_key, device=DEVICE, patience=patience, metrics=metrics, activation=activation) logger.info('----- pretrain[0] end -----') logger.info('----- pretrain[1] start -----') pretrain_conf = conf.sub_task_params.higgsId.pretrain for i, sub_model in enumerate(higgsId): logger.info(f'pretrain: [1][{i}]') set_seed(conf.seed) optimizer = set_module([optim], pretrain_conf, 'optimizer', params=sub_model.parameters()) loss_func = set_module([nn, MyLoss], pretrain_conf, 'loss_func') metrics = set_module([MyMetrics], pretrain_conf, 'metrics') activation = set_module([nn], pretrain_conf, 'activation') input_key = pretrain_conf.data.input_key target_key = pretrain_conf.data.target_key patience = pretrain_conf.patience higgsId[i] = sub_task.pre_train(epochs=pretrain_conf.epochs, model=sub_model, dataloader=dataloader, optimizer=optimizer, loss_func=loss_func, input_key=input_key, target_key=target_key, device=DEVICE, patience=patience, metrics=metrics, activation=activation) logger.info('----- pretrain[1] end -----') # ######################################################################### # ######################################################################### logger.info('copy the pretrain models') pre_trained_tau4vec = set_task(conf.sub_task_params, 'tau4vec', sub_task) pre_trained_higgsId = set_task(conf.sub_task_params, 'higgsId', sub_task) pre_trained_model = [pre_trained_tau4vec, pre_trained_higgsId] task = [tau4vec, higgsId] for num_task, sub in enumerate(task): for num_model in range(len(sub)): pre_trained_model[num_task][num_model].load_state_dict( deepcopy(task[num_task][num_model].state_dict())) # ######################################################################### # ######################################################################### logger.info('----- SPOS-NAS start -----') sposnas_conf = conf.SPOS_NAS def make_output_dict(): return { 'X': [], 'AUC': { f'{f}_{s}': [] for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME) }, 'LOSS_1ST': {f: [] for f in FIRST_MODEL_NAME}, 'LOSS_2ND': { f'{f}_{s}': [] for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME) }, 'RATIO': {f: [] for f in FIRST_MODEL_NAME}, 'ONLY_PT_RATIO': {f: [] for f in FIRST_MODEL_NAME}, } # evaluate only pre-train model loss_func = [ set_module([nn, MyLoss], sposnas_conf, 'loss_first'), set_module([nn, MyLoss], sposnas_conf, 'loss_second') ] loss_weight = [0.5, 0.5] metrics = get_module([MyMetrics], 'Calc_Auc')() from models.SPOS_NAS import SPOS model = SPOS(task=task, loss_func=loss_func, loss_weight=loss_weight) model.to(DEVICE) logger.info('evaluate only pre-train model') dummy = make_output_dict() evaluate(model, conf, dataloader, metrics, dummy) output_dict = make_output_dict() X_list = [i for i in range(11)] X_list[1:1] = [0.01, 0.1] X_list[-1:-1] = [9.9, 9.99] for X in (np.array(X_list) * 0.1).round(10): output_dict['X'].append(X) logger.info(f'loss_ratio: {X:.6f} (loss_1*X + loss_2*(1-X)) start') set_seed(conf.seed) logger.info('load pretrain models...') for num_task, sub in enumerate(task): for num_model in range(len(sub)): task[num_task][num_model].load_state_dict( deepcopy( pre_trained_model[num_task][num_model].state_dict())) logger.info('load pretrain models done') logger.info('set model parameters...') loss_func = [ set_module([nn, MyLoss], sposnas_conf, 'loss_first'), set_module([nn, MyLoss], sposnas_conf, 'loss_second') ] loss_weight = [X, 1. - X] metrics = get_module([MyMetrics], 'Calc_Auc')() model = SPOS(task=task, loss_func=loss_func, loss_weight=loss_weight, save_dir='SPOS') model.to(DEVICE) optimizer = set_module([optim], sposnas_conf, 'optimizer', params=model.parameters()) scheduler = set_module([optim.lr_scheduler], sposnas_conf, 'scheduler', optimizer=optimizer) logger.info('set model parameters done') logger.info('fit model...') model.fit(epochs=sposnas_conf.epochs, dataloader=dataloader, device=DEVICE, optimizer=optimizer, scheduler=scheduler, patience=sposnas_conf.patience) logger.info('fit model done') logger.info('eval model...') output_dict = evaluate(model, conf, dataloader, metrics, output_dict) logger.info('eval model done') set_seed(conf.seed) logger.info('re-train start') selected_model, _ = max( {k: v[-1] for k, v in output_dict['AUC'].items()}.items(), key=lambda x: x[1]) logger.info(f'selected_model: {selected_model}') selected_choice = MODELNAME_CHOICE_INDEX[selected_model] model.fit(epochs=sposnas_conf.epochs, dataloader=dataloader, device=DEVICE, optimizer=optimizer, scheduler=scheduler, patience=sposnas_conf.patience, choice=selected_choice) logger.info('re-train done') dummy = None dummy = make_output_dict() dummy = evaluate(model, conf, dataloader, metrics, dummy) def result_parser(res, selected_model, seed, X): AUC = res['AUC'][selected_model][0] LOSS_1ST = res['LOSS_1ST'][selected_model.split('_')[0]][0] LOSS_2ND = res['LOSS_2ND'][selected_model][0] RATIO = res['RATIO'][selected_model.split('_')[0]][0] ONLY_PT_RATIO = res['ONLY_PT_RATIO'][selected_model.split('_') [0]][0] target_result = dict(seed=seed, X=X, AUC=AUC, LOSS_1ST=LOSS_1ST, LOSS_2ND=LOSS_2ND, RATIO=RATIO, ONLY_PT_RATIO=ONLY_PT_RATIO) logger.info(f're-train results: {target_result}') result_parser(dummy, selected_model, conf.seed, X) logger.info('all train and eval step are done') logger.info('plot results done')
def train(model, optimizer, train_dataset, args, checkpoint_data, dev_dataset=None, unk_dataset=None): """ Train model. Args: - model: BertModelForLangID - optimizer - train_dataset: BertDatasetForClassification - args - checkpoint_data: dict - dev_dataset: (optional) BertDatasetForTesting for dev data (required if args.eval_during_training) - unk_dataset: (optional) BertDatasetForMLM for unlabeled data Returns: None """ assert type(train_dataset) == BertDatasetForClassification if args.eval_during_training: assert dev_dataset is not None assert type(dev_dataset) == BertDatasetForTesting if unk_dataset is not None: assert type(unk_dataset) == BertDatasetForMLM # Where do we save stuff? save_to_dir = args.dir_pretrained_model if args.resume else args.dir_output # Prepare path of training log time_str = datetime.now().strftime("%Y%m%d%H%M%S") train_log_name = "%s.%strain.log" % (time_str, "resume." if args.resume else "") train_log_path = os.path.join(save_to_dir, train_log_name) # Write header in log. We create a new log whether we are fine-tuning from a pre-trained model or resuming a fine-tuning job. header = "GlobalStep\tLossLangID\tAccuracyLangID" if not args.no_mlm: header += "\tLossMLM\tAccuracyMLM" if unk_dataset is not None: header += "\tLossUnkMLM\tAccuracyUnkMLM" header += "\tGradNorm\tWeightNorm" if args.eval_during_training: header += "\tDevLoss\tDevF1Track1\tDevF1Track2\tDevF1Track3" with open(train_log_path, "w") as f: f.write(header + "\n") # Make dataloader(s). Note: since BertDatasetForTraining and its # subclasses are IterableDatasets (i.e. streams), the loader is an # iterable (with no end and no __len__) that we call with iter(). train_dataloader = get_dataloader(train_dataset, args.train_batch_size, args.local_rank) train_batch_sampler = iter(train_dataloader) if unk_dataset is not None: unk_dataloader = get_dataloader(unk_dataset, args.train_batch_size, args.local_rank) unk_batch_enum = enumerate(iter(unk_dataloader)) # Initialize best score if not args.resume: checkpoint_data["best_score"] = 0 if args.resume: # This should not happen. I added it in between versions... if "best_score" not in checkpoint_data: checkpoint_data["best_score"] = 0 # Evaluate model on dev set if args.eval_during_training: logger.info("Evaluating model on dev set before we %s training" % ("resume" if args.resume else "start")) dev_scores = evaluate(model, dev_dataset, args) best_score = dev_scores[args.score_to_optimize] if args.resume: if best_score > checkpoint_data["best_score"]: checkpoint_data["best_score"] = best_score model_to_save = get_module(model) checkpoint_data['best_model_state_dict'] = deepcopy( model_to_save.state_dict()) else: checkpoint_data["best_score"] = best_score log_data = [] log_data.append(str(checkpoint_data["global_step"])) log_data += ["", ""] if not args.no_mlm: log_data += ["", ""] if unk_dataset is not None: log_data += ["", ""] log_data += ["", ""] log_data.append("{:.5f}".format(dev_scores["loss"])) log_data.append("{:.5f}".format(dev_scores["track1"])) log_data.append("{:.5f}".format(dev_scores["track2"])) log_data.append("{:.5f}".format(dev_scores["track3"])) with open(train_log_path, "a") as f: f.write("\t".join(log_data) + "\n") # Start training logger.info("***** Running training *****") for epoch in trange(int(args.num_epochs), desc="Epoch"): model.train() # Some stats for this epoch real_batch_sizes = [] lid_losses = [] lid_accs = [] mlm_losses = [] mlm_accs = [] unk_mlm_losses = [] unk_mlm_accs = [] grad_norms = [] # Run training for one epoch for step in trange(int(args.num_train_steps_per_epoch), desc="Iteration"): batch = next(train_batch_sampler) batch = tuple(t.to(args.device) for t in batch) input_ids = batch[0] input_mask = batch[1] segment_ids = batch[2] label_ids = batch[3] masked_input_ids = batch[4] lm_label_ids = batch[5] real_batch_sizes.append(len(input_ids)) lid_scores = model(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids) if not args.no_mlm: # Call BERT encoder to get encoding of masked input sequences mlm_outputs = model.encoder.bert(input_ids=masked_input_ids, attention_mask=input_mask, token_type_ids=segment_ids, position_ids=None) mlm_last_hidden_states = mlm_outputs[0] # Do MLM on last hidden states mlm_pred_scores = model.encoder.cls(mlm_last_hidden_states) # Do MLM on unk_dataset if present if unk_dataset is not None: unk_batch_id, unk_batch = next(unk_batch_enum) # Make sure the training steps are synced assert unk_batch_id == step unk_batch = tuple(t.to(args.device) for t in unk_batch) xinput_ids, xinput_mask, xsegment_ids, xlm_label_ids = unk_batch # Make sure the batch sizes are equal assert len(xinput_ids) == len(input_ids) unk_mlm_outputs = model.encoder.bert( input_ids=xinput_ids, attention_mask=xinput_mask, token_type_ids=xsegment_ids, position_ids=None) unk_last_hidden_states = unk_mlm_outputs[0] unk_mlm_pred_scores = model.encoder.cls(unk_last_hidden_states) # Compute loss, do backprop. Compute accuracies. loss_fct = CrossEntropyLoss(reduction="mean") loss = loss_fct(lid_scores, label_ids) lid_losses.append(loss.item()) if not args.no_mlm: mlm_loss = loss_fct( mlm_pred_scores.view(-1, model.encoder.config.vocab_size), lm_label_ids.view(-1)) mlm_losses.append(mlm_loss.item()) loss = loss + mlm_loss if unk_dataset is not None: unk_mlm_loss = loss_fct( unk_mlm_pred_scores.view(-1, model.encoder.config.vocab_size), xlm_label_ids.view(-1)) loss = loss + unk_mlm_loss unk_mlm_losses.append(unk_mlm_loss.item()) # Backprop loss = adjust_loss(loss, args) loss.backward() # Compute norm of gradient training_grad_norm = 0 for param in model.parameters(): if param.grad is not None: training_grad_norm += torch.norm(param.grad, p=2).item() grad_norms.append(training_grad_norm) # Compute accuracies lid_acc = accuracy(lid_scores, label_ids) lid_accs.append(lid_acc) if not args.no_mlm: mlm_acc = accuracy(mlm_pred_scores.view( -1, model.encoder.config.vocab_size), lm_label_ids.view(-1), ignore_label=NO_MASK_LABEL) mlm_accs.append(mlm_acc) if unk_dataset is not None: unk_mlm_acc = accuracy(unk_mlm_pred_scores.view( -1, model.encoder.config.vocab_size), xlm_label_ids.view(-1), ignore_label=NO_MASK_LABEL) unk_mlm_accs.append(unk_mlm_acc) # Check if we accumulate grad or do an optimization step if (step + 1) % args.grad_accum_steps == 0: optimizer.step() optimizer.zero_grad() checkpoint_data["global_step"] += 1 if checkpoint_data["global_step"] >= checkpoint_data[ "max_opt_steps"]: break # Compute stats for this epoch last_grad_norm = grad_norms[-1] avg_lid_loss = weighted_avg(lid_losses, real_batch_sizes) avg_lid_acc = weighted_avg(lid_accs, real_batch_sizes) if not args.no_mlm: avg_mlm_loss = weighted_avg(mlm_losses, real_batch_sizes) avg_mlm_acc = weighted_avg(mlm_accs, real_batch_sizes) if unk_dataset is not None: avg_unk_mlm_loss = weighted_avg(unk_mlm_losses, real_batch_sizes) avg_unk_mlm_acc = weighted_avg(unk_mlm_accs, real_batch_sizes) # Compute norm of model weights weight_norm = 0 for param in model.parameters(): weight_norm += torch.norm(param.data, p=2).item() # Evaluate model on dev set if args.eval_during_training: dev_scores = evaluate(model, dev_dataset, args) # Write stats for this epoch in log log_data = [] log_data.append(str(checkpoint_data["global_step"])) log_data.append("{:.5f}".format(avg_lid_loss)) log_data.append("{:.5f}".format(avg_lid_acc)) if not args.no_mlm: log_data.append("{:.5f}".format(avg_mlm_loss)) log_data.append("{:.5f}".format(avg_mlm_acc)) if unk_dataset is not None: log_data.append("{:.5f}".format(avg_unk_mlm_loss)) log_data.append("{:.5f}".format(avg_unk_mlm_acc)) log_data.append("{:.5f}".format(last_grad_norm)) log_data.append("{:.5f}".format(weight_norm)) if args.eval_during_training: log_data.append("{:.5f}".format(dev_scores["loss"])) log_data.append("{:.5f}".format(dev_scores["track1"])) log_data.append("{:.5f}".format(dev_scores["track2"])) log_data.append("{:.5f}".format(dev_scores["track3"])) with open(train_log_path, "a") as f: f.write("\t".join(log_data) + "\n") # Save best model in checkpoint if score has improved save = True if args.eval_during_training: current_score = dev_scores[args.score_to_optimize] if current_score > best_score: best_score = current_score checkpoint_data["best_score"] = best_score model_to_save = get_module(model) checkpoint_data['best_model_state_dict'] = deepcopy( model_to_save.state_dict()) # Save datasets in case we need to resume later train_dataset.close_files() checkpoint_data["train_dataset"] = train_dataset if unk_dataset is not None: unk_dataset.close_files() checkpoint_data["unk_dataset"] = unk_dataset if dev_dataset is not None: checkpoint_data["dev_dataset"] = dev_dataset # Save checkpoint model_to_save = get_module(model) checkpoint_data['model_state_dict'] = model_to_save.state_dict() checkpoint_data['optimizer_state_dict'] = optimizer.state_dict() checkpoint_path = os.path.join(save_to_dir, "checkpoint.tar") logger.info("Saving checkpoint") torch.save(checkpoint_data, checkpoint_path) # Reload datasets we had to close train_dataset.prep_files_for_streaming() if unk_dataset is not None: unk_dataset.prep_files_for_streaming()
#!/usr/bin/python import sys reload(sys) sys.setdefaultencoding('utf-8') import utils try: utils.get_module() except: from portal import PortalApp PortalApp().run() from swm.main import SWMApp SWMApp().run()
def main( conf: str, seed: int, gpu_index: int, data_path: str, event: int, weight: float, n_times_model: int, prefix: str, is_gp_3dim: bool ): global DEVICE, FIRST_MODEL_NAME, SECOND_MODEL_NAME, MODELNAME_CHOICE_INDEX start = time.time() conf = load_config(conf) if seed is not None: conf.seed = seed if gpu_index is not None and DEVICE == torch.device('cuda'): # WARNING: Enable gp_re_index dict in gpu02 only gpu_re_index = {0: 0, 1: 1, 2: 4, 3: 5, 4: 2, 5: 3, 6: 6, 7: 7} gpu_index = gpu_re_index[gpu_index] DEVICE = torch.device(f'cuda:{gpu_index}') if data_path is not None: conf['dataset']['params']['data_path'] = data_path if event is not None: conf['dataset']['params']['max_events'] = event conf['is_gp_3dim'] = is_gp_3dim logger.info(DEVICE) logger.info(conf) model_confs_tau4vec = conf.sub_task_params.tau4vec model_confs_tau4vec['tasks'] = model_confs_tau4vec['tasks'] * n_times_model model_confs_higgsId = conf.sub_task_params.higgsId model_confs_higgsId['tasks'] = model_confs_higgsId['tasks'] * n_times_model sub_models_conf = { 'tau4vec': model_confs_tau4vec, 'higgsId': model_confs_higgsId } FIRST_MODEL_NAME = [ i['name'].split('_')[-1][:-4] + f'-{num}' for num, i in enumerate(model_confs_tau4vec['tasks']) ] SECOND_MODEL_NAME = [ i['name'].split('_')[-1][:-4] + f'-{num}' for num, i in enumerate(model_confs_higgsId['tasks']) ] MODELNAME_CHOICE_INDEX = { f'{n1}_{n2}': v for (n1, n2), v in zip( product(FIRST_MODEL_NAME, SECOND_MODEL_NAME), product(range(len(FIRST_MODEL_NAME)), range(len(SECOND_MODEL_NAME))) ) } set_seed(conf.seed) from models import sub_task tau4vec = set_task(sub_models_conf, 'tau4vec', sub_task) logger.info('set_task: tau4vec') set_seed(conf.seed) higgsId = set_task(sub_models_conf, 'higgsId', sub_task) logger.info('set_task: higgsId') from models import MyDataset from models import MyMetrics set_seed(conf.seed) dataset = set_module([MyDataset], conf, 'dataset') set_seed(conf.seed) dataloader = DataLoader(dataset, batch_size=100, shuffle=True) logger.info('set dataloader') # ######################################################################### # pre-train ############################################################### # ######################################################################### logger.info('----- pretrain[0] start -----') pretrain_conf = model_confs_tau4vec['pretrain'] for i, sub_model in enumerate(tau4vec): logger.info(f'pretrain: [0][{i}]') set_seed(conf.seed) optimizer = set_module([optim], pretrain_conf, 'optimizer', params=sub_model.parameters()) loss_func = set_module([nn, MyLoss], pretrain_conf, 'loss_func') metrics = set_module([MyMetrics], pretrain_conf, 'metrics') activation = set_module([nn], pretrain_conf, 'activation') input_key = pretrain_conf['data']['input_key'] target_key = pretrain_conf['data']['target_key'] patience = pretrain_conf['patience'] tau4vec[i] = sub_task.pre_train(epochs=pretrain_conf['epochs'], model=sub_model, dataloader=dataloader, optimizer=optimizer, loss_func=loss_func, input_key=input_key, target_key=target_key, device=DEVICE, patience=patience, metrics=metrics, activation=activation) logger.info('----- pretrain[0] end -----') logger.info('----- pretrain[1] start -----') pretrain_conf = conf.sub_task_params.higgsId.pretrain for i, sub_model in enumerate(higgsId): logger.info(f'pretrain: [1][{i}]') set_seed(conf.seed) optimizer = set_module([optim], pretrain_conf, 'optimizer', params=sub_model.parameters()) loss_func = set_module([nn, MyLoss], pretrain_conf, 'loss_func') metrics = set_module([MyMetrics], pretrain_conf, 'metrics') activation = set_module([nn], pretrain_conf, 'activation') input_key = pretrain_conf['data']['input_key'] target_key = pretrain_conf['data']['target_key'] patience = pretrain_conf['patience'] higgsId[i] = sub_task.pre_train(epochs=pretrain_conf['epochs'], model=sub_model, dataloader=dataloader, optimizer=optimizer, loss_func=loss_func, input_key=input_key, target_key=target_key, device=DEVICE, patience=patience, metrics=metrics, activation=activation) logger.info('----- pretrain[1] end -----') # ######################################################################### # ######################################################################### logger.info('copy the pretrain models') pre_trained_tau4vec = set_task(sub_models_conf, 'tau4vec', sub_task) pre_trained_higgsId = set_task(sub_models_conf, 'higgsId', sub_task) pre_trained_model = [pre_trained_tau4vec, pre_trained_higgsId] task = [tau4vec, higgsId] for num_task, sub in enumerate(task): for num_model in range(len(sub)): pre_trained_model[num_task][num_model].load_state_dict( deepcopy(task[num_task][num_model].state_dict()) ) # ######################################################################### # ######################################################################### logger.info('----- SPOS-NAS start -----') sposnas_conf = conf.SPOS_NAS def make_output_dict(): return { 'X': [], 'AUC': { f'{f}_{s}': [] for f, s in product( FIRST_MODEL_NAME, SECOND_MODEL_NAME ) }, 'LOSS_1ST': { f: [] for f in FIRST_MODEL_NAME }, 'LOSS_2ND': { f'{f}_{s}': [] for f, s in product( FIRST_MODEL_NAME, SECOND_MODEL_NAME ) }, 'RATIO': { f: [] for f in FIRST_MODEL_NAME }, 'ONLY_PT_RATIO': { f: [] for f in FIRST_MODEL_NAME }, } # SPOS-NAS loss_func = [set_module([nn, MyLoss], sposnas_conf, 'loss_first'), set_module([nn, MyLoss], sposnas_conf, 'loss_second')] loss_weight = [weight, 1. - weight] metrics = get_module([MyMetrics], 'Calc_Auc')() model = SPOS(task=task, loss_func=loss_func, loss_weight=loss_weight) model.to(DEVICE) output_dict = make_output_dict() output_dict['X'].append(weight) logger.info(f'loss_ratio: {weight:.6f} (loss_1*X + loss_2*(1-X)) start') set_seed(conf.seed) logger.info('load pretrain models...') for num_task, sub in enumerate(task): for num_model in range(len(sub)): task[num_task][num_model].load_state_dict( deepcopy(pre_trained_model[num_task][num_model].state_dict()) ) logger.info('load pretrain models done') logger.info('set model parameters...') optimizer = set_module([optim], sposnas_conf, 'optimizer', params=model.parameters()) scheduler = set_module([optim.lr_scheduler], sposnas_conf, 'scheduler', optimizer=optimizer) logger.info('set model parameters done') logger.info('fit model...') model.fit(epochs=sposnas_conf.epochs, dataloader=dataloader, device=DEVICE, optimizer=optimizer, scheduler=scheduler, patience=sposnas_conf.patience) logger.info('fit model done') logger.info('eval model...') output_dict = evaluate(model, conf, dataloader, metrics, output_dict, is_gp_3dim) logger.info('eval model done') set_seed(conf.seed) logger.info('re-train start') selected_model, _ = max( { k: v[-1] for k, v in output_dict['AUC'].items() }.items(), key=lambda x: x[1] ) logger.info(f'selected_model: {selected_model}') selected_choice = MODELNAME_CHOICE_INDEX[selected_model] model.fit(epochs=sposnas_conf.epochs, dataloader=dataloader, device=DEVICE, optimizer=optimizer, scheduler=scheduler, patience=sposnas_conf.patience, choice=selected_choice) logger.info('re-train done') elapsed_time = time.time() - start events = conf.dataset.params.max_events * 2 if prefix: output_file = (f'result.SPOS_NAS-{prefix}_' + f's{seed}_w{weight}_e{events}_' + f'n{n_times_model*3}.json') else: output_file = (f'result.SPOS_NAS-s{seed}_w{weight}_e{events}_' + f'n{n_times_model*3}.json') with open(os.path.join('logs', output_file), 'w') as fo: json.dump( [{ 'agent': 'SPOS-NAS', 'tasks': { 'tau4vec': { 'weight': weight, 'loss_test': -1, 'mse_test': -1, 'ratio_2sigma_GP_test': -1, 'models': FIRST_MODEL_NAME, 'model_selected': selected_model.split('_')[0] }, 'higgsId': { 'weight': 1. - weight, 'loss_test': -1, 'auc_test': -1, 'models': SECOND_MODEL_NAME, 'model_selected': selected_model.split('_')[1] } }, 'loss_test': -1, 'nevents': conf.dataset.params.max_events * 2, 'seed': conf.seed, 'walltime': elapsed_time }], fo, indent=2 ) dummy = make_output_dict() dummy = evaluate(model, conf, dataloader, metrics, dummy, is_gp_3dim) def result_parser(res, selected_model, seed, time): AUC = res['AUC'][selected_model][0] LOSS_1ST = res['LOSS_1ST'][selected_model.split('_')[0]][0] LOSS_2ND = res['LOSS_2ND'][selected_model][0] RATIO = res['RATIO'][selected_model.split('_')[0]][0] ONLY_PT_RATIO = res[ 'ONLY_PT_RATIO' ][selected_model.split('_')[0]][0] target_result = dict( seed=seed, AUC=AUC, LOSS_1ST=LOSS_1ST, LOSS_2ND=LOSS_2ND, RATIO=RATIO, ONLY_PT_RATIO=ONLY_PT_RATIO ) logger.info(f're-train results: {target_result}') return { 'agent': 'SPOS-NAS', 'tasks': { 'tau4vec': { 'weight': weight, 'loss_test': target_result['LOSS_1ST'], 'mse_test': target_result['LOSS_1ST'] * 10000, 'ratio_2sigma_GP_test': target_result['RATIO'], 'models': FIRST_MODEL_NAME, 'model_selected': selected_model.split('_')[0] }, 'higgsId': { 'weight': 1. - weight, 'loss_test': target_result['LOSS_2ND'], 'auc_test': target_result['AUC'], 'models': SECOND_MODEL_NAME, 'model_selected': selected_model.split('_')[1] } }, 'loss_test': (weight * target_result['LOSS_1ST'] + (1. - weight) * target_result['LOSS_2ND']), 'nevents': conf.dataset.params.max_events * 2, 'seed': seed, 'walltime': time } with open(os.path.join('logs', output_file), 'w') as fo: json.dump( [result_parser(dummy, selected_model, conf.seed, elapsed_time)], fo, indent=2 ) logger.info('all train and eval step are done')
def train_algorithm(request): module_id = request.GET.get('module_id') scene_id = request.GET.get('scene_id') limit = request.GET.get('limit') if scene_id and module_id: tmp = utils.get_scene_record(module_id, scene_id) for i in tmp: i['data_length'] = range(len(i['data'][i['data'].keys()[0]])) i['resources'] = [] i['apis'] = [] i['api_info'] = [] api_dict = {} for k in i['data'].keys(): if k != 'total' and k.find('#api#') != 0: i['resources'].append(k) if k != 'total' and k.find('#api#') == 0: api_dict[k[5:]] = i['data'][k] #this_api_id = utils.get_api_by_name(k[5:]) i['api_info'].append(k) # TODO for j in i['data_length']: current_api_dict = {} for k, v in api_dict.iteritems(): current_api_dict[k] = v[j] i['apis'].append(current_api_dict) if limit and int(limit) > 0: ret = {'scene_records': tmp[:int(limit)]} else: ret = {'scene_records': tmp} ret['module_id'] = module_id ret['scene_id'] = scene_id scene_api = utils.get_scene_api(module_id, scene_id) for s in scene_api: s['api_info'] = utils.get_api(s.get('api_id')) # ge threhold if s['api_info']: s['api_info']['threholds'] = utils.get_api_resource( s.get('api_id')) for th in s['api_info']['threholds'].get('resource_list'): th['name'] = utils.get_resource( th.get('resource_id')).get('name') ret['scene_info'] = utils.get_scene(scene_id) ret['module_info'] = utils.get_module(module_id) ret['scene_api'] = scene_api ret['all_resource'] = [] all_resource_ids = [] # get all resource need for s in scene_api: for id in s.get('api_info').get('threholds').get('resource_id'): if not id in all_resource_ids: all_resource_ids.append(id) ret['all_resource'].append(utils.get_resource(id)) ret["public"] = utils.get_public(request) return render(request, 'assess/train_algorithm.html', {'data': ret}) else: return render(request, 'error.html')
def fasta_file(self): return get_module('pyfaidx').Fasta(self.fasta_path)
def main(unused_argv=None): tf.logging.set_verbosity(FLAGS.log) if FLAGS.config is None: raise RuntimeError("No config name specified.") config = utils.get_module("wavenet." + FLAGS.config).Config( FLAGS.train_path) logdir = FLAGS.logdir tf.logging.info("Saving to %s" % logdir) with tf.Graph().as_default(): total_batch_size = FLAGS.total_batch_size assert total_batch_size % FLAGS.worker_replicas == 0 worker_batch_size = total_batch_size / FLAGS.worker_replicas # Run the Reader on the CPU cpu_device = "/job:localhost/replica:0/task:0/cpu:0" if FLAGS.ps_tasks: cpu_device = "/job:worker/cpu:0" with tf.device(cpu_device): inputs_dict = config.get_batch(worker_batch_size) with tf.device( tf.train.replica_device_setter(ps_tasks=FLAGS.ps_tasks, merge_devices=True)): global_step = tf.get_variable( "global_step", [], tf.int32, initializer=tf.constant_initializer(0), trainable=False) # pylint: disable=cell-var-from-loop lr = tf.constant(config.learning_rate_schedule[0]) for key, value in config.learning_rate_schedule.iteritems(): lr = tf.cond(tf.less(global_step, key), lambda: lr, lambda: tf.constant(value)) # pylint: enable=cell-var-from-loop tf.summary.scalar("learning_rate", lr) # build the model graph outputs_dict = config.build(inputs_dict, is_training=True) loss = outputs_dict["loss"] tf.summary.scalar("train_loss", loss) worker_replicas = FLAGS.worker_replicas ema = tf.train.ExponentialMovingAverage(decay=0.9999, num_updates=global_step) opt = tf.train.SyncReplicasOptimizer( tf.train.AdamOptimizer(lr, epsilon=1e-8), worker_replicas, total_num_replicas=worker_replicas, variable_averages=ema, variables_to_average=tf.trainable_variables()) train_op = opt.minimize(loss, global_step=global_step, name="train", colocate_gradients_with_ops=True) session_config = tf.ConfigProto(allow_soft_placement=True) is_chief = (FLAGS.task == 0) local_init_op = opt.chief_init_op if is_chief else opt.local_step_init_op slim.learning.train( train_op=train_op, logdir=logdir, is_chief=is_chief, master=FLAGS.master, number_of_steps=config.num_iters, global_step=global_step, log_every_n_steps=250, local_init_op=local_init_op, save_interval_secs=300, sync_optimizer=opt, session_config=session_config, )
import sgmllib import urllib import random import re import const import utils module = utils.get_module() exec(utils.get_import(mod=module, from_=['utils'])) try: exec(utils.get_import(mod=module, from_=['mounts'], import_=['HookMount', 'CommandMount'])) except ImportError, e: print e class UrlParser(sgmllib.SGMLParser): "A simple parser class." def parse(self, s): "Parse the given string 's'." self.feed(s) self.close() def __init__(self, verbose=0): "Initialise an object, passing 'verbose' to the superclass." sgmllib.SGMLParser.__init__(self, verbose) self.hyperlinks = [] self.descriptions = []
def get_module_view(request): id = request.GET.get("module_id") ret = utils.get_module(id) return public.success_result_http(ret)