def train_model(config_path: Text): pipeline_config = yaml.load(open(config_path), Loader=yaml.FullLoader) config = pipeline_config.get('train') logger = get_logger(name='TRAIN MODEL', loglevel=pipeline_config.get('base').get('loglevel')) logger.debug(f'Start training...') estimator_name = config['estimator_name'] param_grid = config['estimators'][estimator_name]['param_grid'] cv = config['cv'] target_column = pipeline_config['dataset_build']['target_column'] train_df = get_dataset(pipeline_config['split_train_test']['train_csv']) model = train( df=train_df, target_column=target_column, estimator_name=estimator_name, param_grid=param_grid, cv=cv ) logger.debug(f'Best score: {model.best_score_}') model_name = pipeline_config['base']['experiments']['model_name'] models_folder = pipeline_config['base']['experiments']['models_folder'] joblib.dump( model, os.path.join(models_folder, model_name) ) logger.debug(f'Save model to {os.path.join(models_folder, model_name)}')
def __init__(self, config): # Configurations self.config = config self.tables_config = self.config.get("tables", dict()) # Sleeper configurations self.sleeper_base_url = self.config.get("sleeper_base_url") # ESPN configurations self.espn_start_year = self.config.get("espn_start_year", dict()) self.espn_end_year = self.config.get("espn_end_year", dict()) # Command line arguments self.config = config self.args = parse_args(self.config["args"]) self.league_name = self.args["league_name"] self.sleeper_season_id = self.args["sleeper_season_id"] self.gcs_bucket = self.args["gcs_bucket"] self.gbq_project = self.args["gbq_project"] self.gbq_dataset = self.args["gbq_dataset"] self.espn_league_id = self.args.get("espn_league_id") self.espn_s2 = self.args.get("espn_s2") self.espn_swid = self.args.get("espn_swid") self.logger = get_logger("Fantasy Football Stats", level=self.args["log_level"]) # Create GCP clients self.gcs_client = storage.Client() self.gbq_client = bigquery.Client()
def create_features(self): logger = get_logger(self.__class__.__name__) with timer(logger, 'loading data'): train = pd.read_csv(DATA_DIR / TRAIN_NAME) test = pd.read_csv(DATA_DIR / TEST_NAME) spectrum_stack = pd.read_csv(DATA_DIR / "spectrum_stack.csv") with timer(logger, 'get intensity features'): clf = TSNE(n_components=3) X = spectrum_stack.drop(columns=["spectrum_filename"]).values print(X.shape) z = clf.fit_transform(X) result = pd.DataFrame( z, columns=[f"tsne_{i}" for i in range(z.shape[1])]) fe_cols = result.columns result["spectrum_filename"] = spectrum_stack[ "spectrum_filename"].values train = pd.merge(train, result, on="spectrum_filename", how="left") test = pd.merge(test, result, on="spectrum_filename", how="left") self.train_feature = train[fe_cols] self.test_feature = test[fe_cols]
def create_features(self): logger = get_logger(self.__class__.__name__) with timer(logger, 'loading data'): train = pd.read_csv(DATA_DIR / TRAIN_NAME) test = pd.read_csv(DATA_DIR / TEST_NAME) spectrum = pd.read_csv(DATA_DIR / "spectrum.csv") with timer(logger, 'get intensity features'): spectrum["intensity_diff"] = np.abs( spectrum.groupby(["spectrum_filename" ])["intensity"].diff().values) agg = spectrum.groupby("spectrum_filename")["intensity_diff"].agg( STATS) agg_cols = agg.columns train = pd.merge(train, agg.reset_index(), on="spectrum_filename", how="left") test = pd.merge(test, agg.reset_index(), on="spectrum_filename", how="left") self.train_feature = train[agg_cols] self.test_feature = test[agg_cols]
class FundsExplorerSpider(scrapy.Spider): name = 'Funds Explorer Spider' logger = get_logger('Funds Explorer') allowed_domains = ['fundsexplorer.com.br'] start_urls = [ 'https://fundsexplorer.com.br/ranking' ] custom_settings = { 'ITEM_PIPELINES': { 'src.scrawlers.pipelines.JsonWriterPipeline': 900 } } def parse(self, response): self.logger.info('Response arrived!') rows = response.xpath('//*[@id="table-ranking"]//tbody//tr') self.logger.info(f'Found {len(rows)}') for row in rows: item = FIIs() item['code'] = row.xpath('td[1]/a/text()').get() yield item
def split_dataset(config_path: Text): pipeline_config = yaml.load(open(config_path), Loader=yaml.FullLoader) config = pipeline_config.get('split_train_test') logger = get_logger(name='SPLIT_TRAIN_TEST', loglevel=pipeline_config.get('base').get('loglevel')) logger.debug(f'Start split_dataset') dataset = get_dataset(pipeline_config['dataset_build']['dataset_csv']) target_column = pipeline_config['dataset_build']['target_column'] random_state = pipeline_config['base']['random_state'] test_size = config['test_size'] train_csv_path = config['train_csv'] test_csv_path = config['test_csv'] logger.debug(f'Test size: {test_size}') dataset = transform_targets_to_numerics(dataset, target_column=target_column) train_dataset, test_dataset = split_dataset_in_train_test( dataset, test_size=test_size, random_state=random_state) train_dataset.to_csv(train_csv_path, index=False) test_dataset.to_csv(test_csv_path, index=False) logger.debug(f'Train dataset shape: {train_dataset.shape}') logger.debug(f'Save train dataset to {train_csv_path}') logger.debug(f'Test dataset shape: {test_dataset.shape}') logger.debug(f'Save test dataset to {test_csv_path}')
def __init__(self, quantity): self.headers = HEADERS self.scroll_header = SCROLL_HEADERS self.params = PARAMS self.view_id = None self.session_id = None self.data = None self.quantity = quantity self.logger = get_logger('KazPatentCrawler')
def launch_qsub(job_type, mode, n_iter, n_gpu, devices, save_interval, # tuning args model_path, ms, scale, batch_size, splits, # predict args n_blocks, instance_type ): """ python v17.py launch-qsub predict -m v17/ep --ms --scale L --batch-size 26 --splits index,test --n-blocks 32 python v7c.py launch-qsub tuning -d 0,1,2,3 --n-gpu 4 --n-blocks 2 -s 1 --instance-type rt_F """ exp_path = ROOT + f'experiments/{params["ex_name"]}/' logger = utils.get_logger(log_dir=exp_path) job_ids = [] for block_id in range(n_blocks): if job_type == 'tuning': cmd_with_args = [ "python", "-W", "ignore", __file__, "tuning", "--mode", mode, "--n-iter", str(n_iter), "--n-gpu", str(n_gpu), "--devices", devices, "--save-interval", str(save_interval), "--n-blocks", str(n_blocks), "--block-id", str(block_id), ] n_hours = 72 elif job_type == 'predict': cmd_with_args = [ "python", "-W", "ignore", __file__, "predict", "-m", model_path, "--splits", splits, "--scale", scale, "--batch-size", str(batch_size), "--n-blocks", str(n_blocks), "--block-id", str(block_id), ] n_hours = 18 if ms: cmd_with_args.append("--ms") else: raise ValueError('job-type should be one of "tuning" or "predict"') proc = qsub.qsub(cmd_with_args, n_hours=n_hours, instance_type=instance_type, logger=logger) logger.info(f'Response from qsub: {proc.returncode}') m = re.match(r'Your job (\d+) \(', proc.stdout.decode('utf8')) job_id = int(m.group(1)) if m is not None else None logger.info(f'Job id: {job_id}') assert job_id is not None job_ids.append(job_id) time.sleep(1) qsub.monitor_jobs(job_ids, logger)
def create_features(self): logger = get_logger(__name__) with timer(logger, 'loading data'): train = pd.read_csv(DATA_DIR / TRAIN_NAME) test = pd.read_csv(DATA_DIR / TEST_NAME) with timer(logger, 'label encoding'): for col in categorical_features: train_result, test_result = label_encoding(col, train, test) self.train_feature[col] = train_result self.test_feature[col] = test_result
def __init__( self, model, pruning_param_names, final_rate=0.1, pruning_iter=1, prune_once=None, ): super().__init__() assert pruning_iter >= 0 self.final_rate = final_rate self.pruning_iter = pruning_iter prune_once = prune_once or prune_by_percent_once self.pruning_names = set(pruning_param_names) print(self.pruning_names) self._log = utils.get_logger(__name__) self._log.info(self.pruning_names) self.prune_times = 0 self.one_rate = (1 - (self.final_rate**(1.0 / self.pruning_iter)) if self.pruning_iter > 0 else 1.0) self.prune_once = partial(prune_once, self.one_rate) self._log.info( "Pruning iter {}, pruning once persent {}, final remain rate {}". format(self.pruning_iter, self.one_rate, self.final_rate)) # backup initial weights # self.backup_optim = copy(self.optimizer) self.backup_weights = deepcopy(model.state_dict()) if hasattr(model, "module"): self._model = model.module else: self._model = model self._log.debug("model params :{}".format( [name for name, _ in self._model.named_parameters()])) remain_mask = { name: torch.zeros(p.size()).to(p).bool() for name, p in self._model.named_parameters() if name in self.pruning_names } self.remain_mask = remain_mask self.pruning_names = set(self.remain_mask.keys()) self._log.info("Pruning params are in following ...") total_m = 0 for name, p in self.remain_mask.items(): self.logger.info("Need pruning {}, params: {}".format( name, p.numel())) total_m += p.numel() self.logger.info("Total need pruning params: {}".format(total_m)) self.total_params = total_m self.cur_rate = 100.0 self.last_cutoff = 0
def __init__(self): self.engine = create_engine(ENGINE) self.meta = MetaData(self.engine) self.table = Table('patentss', self.meta, Column('id', String, primary_key=True), Column('registration_date', Date), Column('receipt_date', Date), Column('full_name', String), Column('type', String), Column('name_of_work', String), Column('work_creation_date', Date), Column('status', String)) self.logger = get_logger('Saver') if not self.engine.has_table("patentss"): self.table.create()
def create_features(self): logger = get_logger(self.__class__.__name__) with timer(logger, 'loading data'): train = pd.read_csv(DATA_DIR / TRAIN_NAME) test = pd.read_csv(DATA_DIR / TEST_NAME) with timer(logger, 'preprocessing'): train = preprocessing(train) test = preprocessing(test) with timer(logger, 'get numeric features'): for col in numeric_features: self.train_feature[col] = train[col] self.test_feature[col] = test[col]
def __init__(self, config: dict): self.config = config log_dir = Path(config["log_dir"]) log_dir.mkdir(exist_ok=True, parents=True) config_name = Path(config["config_path"]).name.replace(".yml", "") log_dir = log_dir / config_name log_dir.mkdir(parents=True, exist_ok=True) self.init_time = dt.datetime.now().strftime("%Y%m%d-%H:%M:%S") log_name = self.init_time + ".log" logger = utils.get_logger(str(log_dir / log_name)) self.state = State(config, logger)
def __init__(self, bind_host: str, port_pool: list, logger_verbose: bool = False): self.bind_host = bind_host self.port_pool = port_pool self.port_occupied = None self.recv_bytes_data = b"" self.file_overall_checksum = None self.file_name = None self.last_data_packet_seq = None self.last_data_packet_data_size = None self.client_socket = None self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.logger = get_logger("ZTransferTCPServer", logger_verbose) self.logger.debug(f"Constructed ZTransferTCPServer({bind_host}, {port_pool})")
def dataset_build(config_path: Text): pipeline_config = yaml.load(open(config_path), Loader=yaml.FullLoader) config = pipeline_config.get('dataset_build') logger = get_logger(name='BUILD_DATASET', loglevel=pipeline_config.get('base').get('loglevel')) logger.debug(f'Prepare dataset') dataset = get_dataset(config['dataset_csv']) logger.debug(f'Extracting features') featured_dataset = extract_features(dataset) filepath = config['featured_dataset_csv'] featured_dataset.to_csv(filepath, index=False) logger.debug(f'Dataset saved to {filepath}')
def job(devices, resume, save_interval): global params mode_str = 'train' setting = '' exp_path = ROOT + f'exp/{params["ex_name"]}/' os.environ['CUDA_VISIBLE_DEVICES'] = devices logger, writer = utils.get_logger(log_dir=exp_path + f'{mode_str}/log/{setting}', tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}') log_file = open(ROOT + f'exp/{params["ex_name"]}/log', 'a+') if params['augmentation'] == 'soft': params['scale_limit'] = 0.2 params['brightness_limit'] = 0.1 elif params['augmentation'] == 'middle': params['scale_limit'] = 0.3 params['shear_limit'] = 4 params['brightness_limit'] = 0.1 params['contrast_limit'] = 0.1 else: raise ValueError train_transform, eval_transform = data_utils.build_transforms( scale_limit=params['scale_limit'], shear_limit=params['shear_limit'], brightness_limit=params['brightness_limit'], contrast_limit=params['contrast_limit'], ) data_loaders = data_utils. make_data_loaders(data_root=params['data_root'], train_csv=params['train_csv'], val_csv=params['val_csv'], train_transform=train_transform, eval_transform=eval_transform, size = (paras['image_size'],paras['image_size']), batch_size = params['batch_size'], num_workers=8):
def __init__(self, server_host: str, server_port: int, port_pool: list, file_name: str, file_stream: io.BytesIO, logger_verbose: bool = False): self.server_addr = (server_host, server_port) self.port_pool = port_pool self.port_occupied = None self.file_name = file_name self.file_stream = file_stream self.socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.acked_packet_seqs = set() self.all_data_seqs = set() self.to_send_seqs = set() self.session_sent_seqs = set() self.failed_packet_count = 0 self._server_disconnect_ctr = 0 self._acks_got_up_to_timeout = 0 self.old_drop_factor = 100 self.window_size = WINDOW_SIZE_START self.__in_rapid_start = True self.__in_first_rtt = True self._updated_window_size_in_rtt = False self.buffer_memview = None self.logger = get_logger("ZTransferUDPClient", logger_verbose) self.logger.debug( f"Constructed ZTransferUDPClient({server_host}, {server_port}, ...)" ) self.logger.debug(f"WINDOW_SIZE: {self.window_size}") self.logger.debug(f"CREQ_TIMER_DURATION: {CREQ_TIMER_DURATION}") self.logger.debug( f"RAPID_RECV_TIMER_DURATION: {RAPID_RECV_TIMER_DURATION}")
def __init__(self, server_host: str, server_port: int, port_pool: list, file_name: str, file_stream: io.BytesIO, logger_verbose: bool = False): self.server_host = server_host self.server_port = server_port self.port_pool = port_pool self.port_occupied = None self.file_name = file_name self.file_stream = file_stream self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.logger = get_logger("ZTransferTCPClient", logger_verbose) self.logger.debug( f"Constructed ZTransferTCPClient({server_host}, {server_port}, ...)" )
def prune_by_percent_once(percent, mask, param): # Put the weights that aren't masked out in sorted order. sorted_weights = np.sort(np.abs(param[mask == 1]), axis=None) # Determine the cutoff for weights to be pruned. if sorted_weights.size <= 0: utils.get_logger(__name__).warning( "cutoff all of params, shape: {}".format(param.shape)) utils.get_logger(__name__).warning("last cutoff mask {}".format( np.sum(mask))) # print('cut all of params') return np.zeros(mask.shape) cutoff_index = np.round(percent * sorted_weights.size).astype(int) cutoff = sorted_weights[cutoff_index] utils.get_logger(__name__).debug( "cutoff index{}, cutoff weights {}".format(cutoff_index, cutoff)) # Prune all weights below the cutoff. return np.where(np.abs(param) <= cutoff, np.zeros(mask.shape), mask)
elapsed_sec = elapsed_sec % 60 logger.info( f"Elapsed time: {elapsed_min}min {elapsed_sec:.4f}seconds.") if __name__ == "__main__": warnings.filterwarnings("ignore") args = utils.get_parser().parse_args() config = utils.load_config(args.config) global_params = config["globals"] output_dir = Path(global_params["output_dir"]) output_dir.mkdir(exist_ok=True, parents=True) logger = utils.get_logger(output_dir / "output.log") utils.set_seed(global_params["seed"]) device = C.get_device(global_params["device"]) df, datadir = C.get_metadata(config) splitter = C.get_split(config) calltype_labels = C.get_calltype_labels(df) if config["data"].get("event_level_labels") is not None: event_level_labels = C.get_event_level_labels(config) else: event_level_labels = None if "Multilabel" in config["split"]["name"]:
''' Provide different strategys. if want to add a new one, just put here. params: pandas dataframe with ohlcv data return: signal string, 'Buy', or 'Sell', or 'Nothing' ''' import src.utils as u logger = u.get_logger(__name__) class Strategy: def __init__(self): pass def MACD(self, df): ''' strategy function read data from ohlcv array with length 50, gives a long or short or nothing signal ''' df = u.macd(df) logger.debug( f'DIF: {str(df.macd.values[-2])} - {str(df.macd.values[-1])}') logger.debug( f'DEA: {str(df.macd_signal.values[-2])} - {str(df.macd_signal.values[-1])}' ) logger.debug( f'BAR: {str(df.macd_diff.values[-2])} - {str(df.macd_diff.values[-1])}' ) if u.crossover(df.macd.values, df.macd_signal.values): return 'Buy' elif u.crossunder(df.macd.values, df.macd_signal.values): return 'Sell'
# Path for db PATH_DB = Path().cwd() / "data" / "db.json" PATH_DB.mkdir(parents=True, exist_ok=True) # Bot name - "<platform>:<name>:v<version> (by /u/<username>)" USER_AGENT = "" # Reddit client id/secret HTTP_AUTH_LOGIN = "" HTTP_AUTH_PASSWORD = "" # Reddit login/password REDDIT_LOGIN = "" REDDIT_PASSWORD = "" logger = get_logger("reddit_archiver", file_name="reddit_archiver.log") @lru_cache() def get_token(ttl_hash: int = None): """ Authenticate with Reddit and receive token. :return: token. """ del ttl_hash client_auth = requests.auth.HTTPBasicAuth(HTTP_AUTH_LOGIN, HTTP_AUTH_PASSWORD) post_data = { "grant_type": "password",
# you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import uuid import base64 # Works in lambda environment import src.utils as utils logger = utils.get_logger() class ApiGateway(): def __init__(self, lambda_instance): self.lambda_instance = lambda_instance def is_post_request_with_body(self): return self.lambda_instance.event['httpMethod'] == 'POST' def is_post_request_with_body_json(self): return self.lambda_instance.event[ 'httpMethod'] == 'POST' and self.lambda_instance.event['headers'][ 'Content-Type'].strip() == 'application/json' def is_request_with_parameters(self):
def initialize(self): self.logger = get_logger('auth')
default=0, help='start loop number, used to calculate first epoch number') parser.add_argument( '--end', type=int, default=18, help='end loop number, used to calculate last epoch number') args, _ = parser.parse_known_args() if __name__ == "__main__": # logger args.outputs_dir = os.path.join( args.ckpt_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')) args.logger = get_logger(args.outputs_dir, args.rank) args.logger.save_args(args) if args.ckpt_name != "": args.start = 0 args.end = 1 for loop in range(args.start, args.end, 1): network = CenterfaceMobilev2() default_recurisive_init(network) if args.ckpt_name == "": ckpt_num = loop * args.device_num + args.rank + 1 ckpt_name = "0-" + str(ckpt_num) + "_" + str( args.steps_per_epoch * ckpt_num) + ".ckpt" else:
def initialize(self): self.connection = get_mongodb_connection() self._db = self.connection[options.auth_db] self.logger = get_logger('auth') self.auth_driver = get_driver(options.auth_driver)
import collections from datetime import datetime as dt from src.adapters import BaseAdapter from src.utils import get_logger LOGGER = get_logger(__name__) class LStreamingAdapter(BaseAdapter): def __init__(self): BaseAdapter.__init__(self) self.__figure = None @property def data(self): return self.__data def update_data(self): content = self.get_content() updated = dt.now() self.__data = { 'content': content, 'last_update': updated.isoformat(), 'figure': self.get_figure() } def get_figure(self): if self.__figure is None: return self.__get_figure_updated()
# from test_tube import Experiment from pathlib import Path from typing import List import hydra import numpy as np import pandas as pd import torch from omegaconf import DictConfig from pytorch_lightning import Callback, LightningDataModule, LightningModule, Trainer from pytorch_lightning.loggers import LightningLoggerBase from src.utils import get_logger from src.utils.metrics import purity log = get_logger(__name__) def train_model(config: DictConfig): """ Training module for clustering of event sequences """ np.set_printoptions(threshold=10000) torch.set_printoptions(threshold=10000) default_save_dir = config.save_dir # Init and prepare lightning datamodule log.info(f"Instantiating datamodule <{config.datamodule._target_}>") dm: LightningDataModule = hydra.utils.instantiate(config.datamodule) dm.prepare_data() for i in range(config.n_runs):
args = get_args() if args.apex: from apex import amp EXP_ID = os.path.splitext(os.path.basename(args.config))[0] SEED = 42 + 1 DEVICE = "cuda" import json setting_json = open('SETTINGS.json', 'r') setting_json = json.load(setting_json) output_dir = Path(setting_json["OUTPUT"]) / EXP_ID output_dir.mkdir(exist_ok=True, parents=True) _logger = get_logger(output_dir / f"fold{args.fold}_output.log") def log(msg): _logger.info(msg) def log_w(msg): _logger.warn(msg) log(f'EXP {EXP_ID} start') def main(): config = utils.load_config(args.config)
except: os.system('pip install lightgbm') import tensorflow as tf gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) except RuntimeError as e: print(e) from src.utils import get_logger LOGGER = get_logger('GLOBAL') current_path = os.path.abspath(__file__) config_file_path = os.path.abspath( os.path.join(os.path.dirname(current_path), 'config.json')) import json config = json.load(open(config_file_path, 'r')) from src.meta.multiprocess import multiprocess from src.meta.proto import prototype_multi if config['epochs'] == 'auto': config['epochs'] = prototype_multi.MAX_LOCAL_EPOCH
from statistics import mean import cv2 from args import get_args from src.label import Shape, writeShapes from src.tflite_utils import load_model, detect_lp from src.utils import im2single, get_model_memory_usage, get_logger, setup_dirs, get_gzipped_model_size def adjust_pts(pts, lroi): return pts * lroi.wh().reshape((2, 1)) + lroi.tl().reshape((2, 1)) setup_dirs() logger = get_logger("lp-tflite-detection") args = get_args() if __name__ == '__main__': args = get_args() if args.use_colab: from google.colab import drive drive.mount('/content/gdrive') OUTPUT_DIR = '/content/gdrive/My Drive/lpd/{}_{}_{}_{}_{}'.format( args.image_size, args.epochs, args.prune_model, args.initial_sparsity, args.final_sparsity) if not os.path.isdir(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) tflite_path = '{}/{}.tflite'.format(OUTPUT_DIR, args.model) pruned_tflite_path = '{}/{}_pruned.tflite'.format( OUTPUT_DIR, args.model)
def job(tuning, params_path, devices, resume, save_interval): global params if tuning: with open(params_path, 'r') as f: params = json.load(f) mode_str = 'tuning' setting = '_'.join(f'{tp}-{params[tp]}' for tp in params['tuning_params']) else: mode_str = 'train' setting = '' # パラメーターを変えるときにseedも変えたい(seed averagingの効果を期待) seed = sum(ord(_) for _ in str(params.values())) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.benchmark = False exp_path = ROOT + f'experiments/{params["ex_name"]}/' os.environ['CUDA_VISIBLE_DEVICES'] = devices logger, writer = utils.get_logger( log_dir=exp_path + f'{mode_str}/log/{setting}', tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}') if params['augmentation'] == 'soft': params['scale_limit'] = 0.2 params['brightness_limit'] = 0.1 elif params['augmentation'] == 'middle': params['scale_limit'] = 0.3 params['shear_limit'] = 4 params['brightness_limit'] = 0.1 params['contrast_limit'] = 0.1 else: raise ValueError train_transform, eval_transform = data_utils.build_transforms( scale_limit=params['scale_limit'], shear_limit=params['shear_limit'], brightness_limit=params['brightness_limit'], contrast_limit=params['contrast_limit'], ) data_loaders = data_utils.make_train_loaders( params=params, data_root=ROOT + 'input/' + params['data'], train_transform=train_transform, eval_transform=eval_transform, scale='S', test_size=0, class_topk=params['class_topk'], num_workers=8) model = models.LandmarkNet( n_classes=params['class_topk'], model_name=params['model_name'], pooling=params['pooling'], loss_module=params['loss'], s=params['s'], margin=params['margin'], theta_zero=params['theta_zero'], use_fc=params['use_fc'], fc_dim=params['fc_dim'], ).cuda() optimizer = utils.get_optim(params, model) criterion = nn.CrossEntropyLoss() scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=params['epochs'] * len(data_loaders['train']), eta_min=3e-6) start_epoch = 0 if len(devices.split(',')) > 1: model = nn.DataParallel(model) for epoch in range(start_epoch, params['epochs']): logger.info( f'Epoch {epoch}/{params["epochs"]} | lr: {optimizer.param_groups[0]["lr"]}' ) # ============================== train ============================== # model.train(True) losses = utils.AverageMeter() prec1 = utils.AverageMeter() for i, (_, x, y) in tqdm(enumerate(data_loaders['train']), total=len(data_loaders['train']), miniters=None, ncols=55): x = x.to('cuda') y = y.to('cuda') outputs = model(x, y) loss = criterion(outputs, y) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() acc = metrics.accuracy(outputs, y) losses.update(loss.item(), x.size(0)) prec1.update(acc, x.size(0)) if i % 100 == 99: logger.info( f'{epoch+i/len(data_loaders["train"]):.2f}epoch | {setting} acc: {prec1.avg}' ) train_loss = losses.avg train_acc = prec1.avg writer.add_scalars('Loss', {'train': train_loss}, epoch) writer.add_scalars('Acc', {'train': train_acc}, epoch) writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch) if (epoch + 1) == params['epochs'] or (epoch + 1) % save_interval == 0: output_file_name = exp_path + f'ep{epoch}_' + setting + '.pth' utils.save_checkpoint(path=output_file_name, model=model, epoch=epoch, optimizer=optimizer, params=params) model = model.module datasets = ('roxford5k', 'rparis6k') results = eval_datasets(model, datasets=datasets, ms=False, tta_gem_p=1.0, logger=logger) if tuning: tuning_result = {} for d in datasets: for key in ['mapE', 'mapM', 'mapH']: mapE, mapM, mapH, mpE, mpM, mpH, kappas = results[d] tuning_result[d + '-' + key] = [eval(key)] utils.write_tuning_result(params, tuning_result, exp_path + 'tuning/results.csv')
def __init__(self): self.logger = get_logger('auth')