Ejemplo n.º 1
0
def train_model(config_path: Text):

    pipeline_config = yaml.load(open(config_path), Loader=yaml.FullLoader)
    config = pipeline_config.get('train')

    logger = get_logger(name='TRAIN MODEL', loglevel=pipeline_config.get('base').get('loglevel'))
    logger.debug(f'Start training...')

    estimator_name = config['estimator_name']
    param_grid = config['estimators'][estimator_name]['param_grid']
    cv = config['cv']

    target_column = pipeline_config['dataset_build']['target_column']
    train_df = get_dataset(pipeline_config['split_train_test']['train_csv'])

    model = train(
        df=train_df,
        target_column=target_column,
        estimator_name=estimator_name,
        param_grid=param_grid,
        cv=cv
    )
    logger.debug(f'Best score: {model.best_score_}')

    model_name = pipeline_config['base']['experiments']['model_name']
    models_folder = pipeline_config['base']['experiments']['models_folder']

    joblib.dump(
        model,
        os.path.join(models_folder, model_name)
    )
    logger.debug(f'Save model to {os.path.join(models_folder, model_name)}')
Ejemplo n.º 2
0
 def __init__(self, config):
     # Configurations
     self.config = config
     self.tables_config = self.config.get("tables", dict())
     # Sleeper configurations
     self.sleeper_base_url = self.config.get("sleeper_base_url")
     # ESPN configurations
     self.espn_start_year = self.config.get("espn_start_year", dict())
     self.espn_end_year = self.config.get("espn_end_year", dict())
     # Command line arguments
     self.config = config
     self.args = parse_args(self.config["args"])
     self.league_name = self.args["league_name"]
     self.sleeper_season_id = self.args["sleeper_season_id"]
     self.gcs_bucket = self.args["gcs_bucket"]
     self.gbq_project = self.args["gbq_project"]
     self.gbq_dataset = self.args["gbq_dataset"]
     self.espn_league_id = self.args.get("espn_league_id")
     self.espn_s2 = self.args.get("espn_s2")
     self.espn_swid = self.args.get("espn_swid")
     self.logger = get_logger("Fantasy Football Stats",
                              level=self.args["log_level"])
     # Create GCP clients
     self.gcs_client = storage.Client()
     self.gbq_client = bigquery.Client()
    def create_features(self):
        logger = get_logger(self.__class__.__name__)

        with timer(logger, 'loading data'):
            train = pd.read_csv(DATA_DIR / TRAIN_NAME)
            test = pd.read_csv(DATA_DIR / TEST_NAME)
            spectrum_stack = pd.read_csv(DATA_DIR / "spectrum_stack.csv")

        with timer(logger, 'get intensity features'):
            clf = TSNE(n_components=3)
            X = spectrum_stack.drop(columns=["spectrum_filename"]).values
            print(X.shape)
            z = clf.fit_transform(X)

            result = pd.DataFrame(
                z, columns=[f"tsne_{i}" for i in range(z.shape[1])])
            fe_cols = result.columns
            result["spectrum_filename"] = spectrum_stack[
                "spectrum_filename"].values

            train = pd.merge(train, result, on="spectrum_filename", how="left")
            test = pd.merge(test, result, on="spectrum_filename", how="left")

            self.train_feature = train[fe_cols]
            self.test_feature = test[fe_cols]
Ejemplo n.º 4
0
    def create_features(self):
        logger = get_logger(self.__class__.__name__)

        with timer(logger, 'loading data'):
            train = pd.read_csv(DATA_DIR / TRAIN_NAME)
            test = pd.read_csv(DATA_DIR / TEST_NAME)
            spectrum = pd.read_csv(DATA_DIR / "spectrum.csv")

        with timer(logger, 'get intensity features'):
            spectrum["intensity_diff"] = np.abs(
                spectrum.groupby(["spectrum_filename"
                                  ])["intensity"].diff().values)
            agg = spectrum.groupby("spectrum_filename")["intensity_diff"].agg(
                STATS)
            agg_cols = agg.columns
            train = pd.merge(train,
                             agg.reset_index(),
                             on="spectrum_filename",
                             how="left")
            test = pd.merge(test,
                            agg.reset_index(),
                            on="spectrum_filename",
                            how="left")

            self.train_feature = train[agg_cols]
            self.test_feature = test[agg_cols]
Ejemplo n.º 5
0
class FundsExplorerSpider(scrapy.Spider):
  
    name = 'Funds Explorer Spider'
    logger = get_logger('Funds Explorer')
  
    allowed_domains = ['fundsexplorer.com.br']
    start_urls = [
      'https://fundsexplorer.com.br/ranking'
    ]
  
    custom_settings = {
      'ITEM_PIPELINES': {
        'src.scrawlers.pipelines.JsonWriterPipeline': 900
      }
    }
  
    def parse(self, response):
        self.logger.info('Response arrived!')
    
        rows = response.xpath('//*[@id="table-ranking"]//tbody//tr')

        self.logger.info(f'Found {len(rows)}')
    
        for row in rows:
            
            item = FIIs()
            item['code'] = row.xpath('td[1]/a/text()').get()
            
            yield item
Ejemplo n.º 6
0
def split_dataset(config_path: Text):

    pipeline_config = yaml.load(open(config_path), Loader=yaml.FullLoader)
    config = pipeline_config.get('split_train_test')

    logger = get_logger(name='SPLIT_TRAIN_TEST',
                        loglevel=pipeline_config.get('base').get('loglevel'))
    logger.debug(f'Start split_dataset')

    dataset = get_dataset(pipeline_config['dataset_build']['dataset_csv'])
    target_column = pipeline_config['dataset_build']['target_column']
    random_state = pipeline_config['base']['random_state']

    test_size = config['test_size']
    train_csv_path = config['train_csv']
    test_csv_path = config['test_csv']
    logger.debug(f'Test size: {test_size}')

    dataset = transform_targets_to_numerics(dataset,
                                            target_column=target_column)
    train_dataset, test_dataset = split_dataset_in_train_test(
        dataset, test_size=test_size, random_state=random_state)

    train_dataset.to_csv(train_csv_path, index=False)
    test_dataset.to_csv(test_csv_path, index=False)

    logger.debug(f'Train dataset shape: {train_dataset.shape}')
    logger.debug(f'Save train dataset to {train_csv_path}')
    logger.debug(f'Test dataset shape: {test_dataset.shape}')
    logger.debug(f'Save test dataset to {test_csv_path}')
Ejemplo n.º 7
0
 def __init__(self, quantity):
     self.headers = HEADERS
     self.scroll_header = SCROLL_HEADERS
     self.params = PARAMS
     self.view_id = None
     self.session_id = None
     self.data = None
     self.quantity = quantity
     self.logger = get_logger('KazPatentCrawler')
def launch_qsub(job_type,
                mode, n_iter, n_gpu, devices, save_interval,  # tuning args
                model_path, ms, scale, batch_size, splits,  # predict args
                n_blocks, instance_type
                ):
    """
    python v17.py launch-qsub predict -m v17/ep --ms --scale L --batch-size 26 --splits index,test --n-blocks 32
    python v7c.py launch-qsub tuning -d 0,1,2,3 --n-gpu 4 --n-blocks 2 -s 1 --instance-type rt_F
    """
    exp_path = ROOT + f'experiments/{params["ex_name"]}/'
    logger = utils.get_logger(log_dir=exp_path)
    job_ids = []
    for block_id in range(n_blocks):
        if job_type == 'tuning':
            cmd_with_args = [
                "python", "-W", "ignore", __file__, "tuning",
                "--mode", mode,
                "--n-iter", str(n_iter),
                "--n-gpu", str(n_gpu),
                "--devices", devices,
                "--save-interval", str(save_interval),
                "--n-blocks", str(n_blocks),
                "--block-id", str(block_id),
            ]
            n_hours = 72
        elif job_type == 'predict':
            cmd_with_args = [
                "python", "-W", "ignore", __file__, "predict",
                "-m", model_path,
                "--splits", splits,
                "--scale", scale,
                "--batch-size", str(batch_size),
                "--n-blocks", str(n_blocks),
                "--block-id", str(block_id),
            ]
            n_hours = 18
            if ms:
                cmd_with_args.append("--ms")
        else:
            raise ValueError('job-type should be one of "tuning" or "predict"')
        proc = qsub.qsub(cmd_with_args,
                         n_hours=n_hours,
                         instance_type=instance_type,
                         logger=logger)
        logger.info(f'Response from qsub: {proc.returncode}')

        m = re.match(r'Your job (\d+) \(', proc.stdout.decode('utf8'))
        job_id = int(m.group(1)) if m is not None else None
        logger.info(f'Job id: {job_id}')
        assert job_id is not None
        job_ids.append(job_id)
        time.sleep(1)

    qsub.monitor_jobs(job_ids, logger)
Ejemplo n.º 9
0
    def create_features(self):
        logger = get_logger(__name__)

        with timer(logger, 'loading data'):
            train = pd.read_csv(DATA_DIR / TRAIN_NAME)
            test = pd.read_csv(DATA_DIR / TEST_NAME)

        with timer(logger, 'label encoding'):
            for col in categorical_features:
                train_result, test_result = label_encoding(col, train, test)
                self.train_feature[col] = train_result
                self.test_feature[col] = test_result
Ejemplo n.º 10
0
    def __init__(
        self,
        model,
        pruning_param_names,
        final_rate=0.1,
        pruning_iter=1,
        prune_once=None,
    ):
        super().__init__()
        assert pruning_iter >= 0
        self.final_rate = final_rate
        self.pruning_iter = pruning_iter
        prune_once = prune_once or prune_by_percent_once
        self.pruning_names = set(pruning_param_names)
        print(self.pruning_names)
        self._log = utils.get_logger(__name__)
        self._log.info(self.pruning_names)
        self.prune_times = 0
        self.one_rate = (1 - (self.final_rate**(1.0 / self.pruning_iter))
                         if self.pruning_iter > 0 else 1.0)
        self.prune_once = partial(prune_once, self.one_rate)
        self._log.info(
            "Pruning iter {}, pruning once persent {}, final remain rate {}".
            format(self.pruning_iter, self.one_rate, self.final_rate))

        # backup initial weights
        # self.backup_optim = copy(self.optimizer)
        self.backup_weights = deepcopy(model.state_dict())
        if hasattr(model, "module"):
            self._model = model.module
        else:
            self._model = model
        self._log.debug("model params :{}".format(
            [name for name, _ in self._model.named_parameters()]))
        remain_mask = {
            name: torch.zeros(p.size()).to(p).bool()
            for name, p in self._model.named_parameters()
            if name in self.pruning_names
        }
        self.remain_mask = remain_mask
        self.pruning_names = set(self.remain_mask.keys())

        self._log.info("Pruning params are in following ...")
        total_m = 0
        for name, p in self.remain_mask.items():
            self.logger.info("Need pruning {}, params: {}".format(
                name, p.numel()))
            total_m += p.numel()
        self.logger.info("Total need pruning params: {}".format(total_m))
        self.total_params = total_m
        self.cur_rate = 100.0
        self.last_cutoff = 0
Ejemplo n.º 11
0
    def __init__(self):
        self.engine = create_engine(ENGINE)
        self.meta = MetaData(self.engine)
        self.table = Table('patentss', self.meta,
                           Column('id', String, primary_key=True),
                           Column('registration_date', Date),
                           Column('receipt_date', Date),
                           Column('full_name', String), Column('type', String),
                           Column('name_of_work', String),
                           Column('work_creation_date', Date),
                           Column('status', String))
        self.logger = get_logger('Saver')

        if not self.engine.has_table("patentss"):
            self.table.create()
Ejemplo n.º 12
0
    def create_features(self):
        logger = get_logger(self.__class__.__name__)

        with timer(logger, 'loading data'):
            train = pd.read_csv(DATA_DIR / TRAIN_NAME)
            test = pd.read_csv(DATA_DIR / TEST_NAME)

        with timer(logger, 'preprocessing'):
            train = preprocessing(train)
            test = preprocessing(test)

        with timer(logger, 'get numeric features'):
            for col in numeric_features:
                self.train_feature[col] = train[col]
                self.test_feature[col] = test[col]
    def __init__(self, config: dict):
        self.config = config

        log_dir = Path(config["log_dir"])
        log_dir.mkdir(exist_ok=True, parents=True)

        config_name = Path(config["config_path"]).name.replace(".yml", "")
        log_dir = log_dir / config_name
        log_dir.mkdir(parents=True, exist_ok=True)

        self.init_time = dt.datetime.now().strftime("%Y%m%d-%H:%M:%S")
        log_name = self.init_time + ".log"

        logger = utils.get_logger(str(log_dir / log_name))

        self.state = State(config, logger)
Ejemplo n.º 14
0
    def __init__(self, bind_host: str, port_pool: list, logger_verbose: bool = False):
        self.bind_host = bind_host
        self.port_pool = port_pool
        self.port_occupied = None
        
        self.recv_bytes_data = b""
        self.file_overall_checksum = None
        self.file_name = None
        self.last_data_packet_seq = None
        self.last_data_packet_data_size = None

        self.client_socket = None
        self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        self.logger = get_logger("ZTransferTCPServer", logger_verbose)
            
        self.logger.debug(f"Constructed ZTransferTCPServer({bind_host}, {port_pool})")
Ejemplo n.º 15
0
def dataset_build(config_path: Text):

    pipeline_config = yaml.load(open(config_path), Loader=yaml.FullLoader)
    config = pipeline_config.get('dataset_build')

    logger = get_logger(name='BUILD_DATASET',
                        loglevel=pipeline_config.get('base').get('loglevel'))
    logger.debug(f'Prepare dataset')

    dataset = get_dataset(config['dataset_csv'])

    logger.debug(f'Extracting features')
    featured_dataset = extract_features(dataset)

    filepath = config['featured_dataset_csv']
    featured_dataset.to_csv(filepath, index=False)
    logger.debug(f'Dataset saved to {filepath}')
Ejemplo n.º 16
0
def job(devices, resume, save_interval):
    global params

    mode_str = 'train'
    setting = ''

    exp_path = ROOT + f'exp/{params["ex_name"]}/'
    os.environ['CUDA_VISIBLE_DEVICES'] = devices

    logger, writer = utils.get_logger(log_dir=exp_path + f'{mode_str}/log/{setting}',
                                      tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}')
    log_file = open(ROOT + f'exp/{params["ex_name"]}/log', 'a+')


    if params['augmentation'] == 'soft':
        params['scale_limit'] = 0.2
        params['brightness_limit'] = 0.1
    elif params['augmentation'] == 'middle':
        params['scale_limit'] = 0.3
        params['shear_limit'] = 4
        params['brightness_limit'] = 0.1
        params['contrast_limit'] = 0.1
    else:
        raise ValueError

    train_transform, eval_transform = data_utils.build_transforms(
        scale_limit=params['scale_limit'],
        shear_limit=params['shear_limit'],
        brightness_limit=params['brightness_limit'],
        contrast_limit=params['contrast_limit'],
    )



    data_loaders = data_utils. make_data_loaders(data_root=params['data_root'],
                       train_csv=params['train_csv'],
                       val_csv=params['val_csv'],
                       train_transform=train_transform,
                       eval_transform=eval_transform,
                       size = (paras['image_size'],paras['image_size']),
                       batch_size = params['batch_size'],
                       num_workers=8):
Ejemplo n.º 17
0
    def __init__(self,
                 server_host: str,
                 server_port: int,
                 port_pool: list,
                 file_name: str,
                 file_stream: io.BytesIO,
                 logger_verbose: bool = False):
        self.server_addr = (server_host, server_port)
        self.port_pool = port_pool
        self.port_occupied = None

        self.file_name = file_name
        self.file_stream = file_stream
        self.socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)

        self.acked_packet_seqs = set()
        self.all_data_seqs = set()
        self.to_send_seqs = set()
        self.session_sent_seqs = set()

        self.failed_packet_count = 0
        self._server_disconnect_ctr = 0
        self._acks_got_up_to_timeout = 0

        self.old_drop_factor = 100
        self.window_size = WINDOW_SIZE_START
        self.__in_rapid_start = True
        self.__in_first_rtt = True
        self._updated_window_size_in_rtt = False

        self.buffer_memview = None

        self.logger = get_logger("ZTransferUDPClient", logger_verbose)
        self.logger.debug(
            f"Constructed ZTransferUDPClient({server_host}, {server_port}, ...)"
        )

        self.logger.debug(f"WINDOW_SIZE: {self.window_size}")
        self.logger.debug(f"CREQ_TIMER_DURATION: {CREQ_TIMER_DURATION}")
        self.logger.debug(
            f"RAPID_RECV_TIMER_DURATION: {RAPID_RECV_TIMER_DURATION}")
Ejemplo n.º 18
0
    def __init__(self,
                 server_host: str,
                 server_port: int,
                 port_pool: list,
                 file_name: str,
                 file_stream: io.BytesIO,
                 logger_verbose: bool = False):
        self.server_host = server_host
        self.server_port = server_port
        self.port_pool = port_pool
        self.port_occupied = None

        self.file_name = file_name
        self.file_stream = file_stream
        self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        self.logger = get_logger("ZTransferTCPClient", logger_verbose)

        self.logger.debug(
            f"Constructed ZTransferTCPClient({server_host}, {server_port}, ...)"
        )
Ejemplo n.º 19
0
def prune_by_percent_once(percent, mask, param):
    # Put the weights that aren't masked out in sorted order.
    sorted_weights = np.sort(np.abs(param[mask == 1]), axis=None)

    # Determine the cutoff for weights to be pruned.
    if sorted_weights.size <= 0:
        utils.get_logger(__name__).warning(
            "cutoff all of params, shape: {}".format(param.shape))
        utils.get_logger(__name__).warning("last cutoff mask {}".format(
            np.sum(mask)))
        # print('cut all of params')
        return np.zeros(mask.shape)

    cutoff_index = np.round(percent * sorted_weights.size).astype(int)
    cutoff = sorted_weights[cutoff_index]
    utils.get_logger(__name__).debug(
        "cutoff index{}, cutoff weights {}".format(cutoff_index, cutoff))
    # Prune all weights below the cutoff.
    return np.where(np.abs(param) <= cutoff, np.zeros(mask.shape), mask)
Ejemplo n.º 20
0
        elapsed_sec = elapsed_sec % 60
        logger.info(
            f"Elapsed time: {elapsed_min}min {elapsed_sec:.4f}seconds.")


if __name__ == "__main__":
    warnings.filterwarnings("ignore")

    args = utils.get_parser().parse_args()
    config = utils.load_config(args.config)

    global_params = config["globals"]

    output_dir = Path(global_params["output_dir"])
    output_dir.mkdir(exist_ok=True, parents=True)
    logger = utils.get_logger(output_dir / "output.log")

    utils.set_seed(global_params["seed"])
    device = C.get_device(global_params["device"])

    df, datadir = C.get_metadata(config)
    splitter = C.get_split(config)

    calltype_labels = C.get_calltype_labels(df)

    if config["data"].get("event_level_labels") is not None:
        event_level_labels = C.get_event_level_labels(config)
    else:
        event_level_labels = None

    if "Multilabel" in config["split"]["name"]:
Ejemplo n.º 21
0
'''
Provide different strategys. if want to add a new one, just put here.
params: pandas dataframe with ohlcv data
return: signal string, 'Buy', or 'Sell', or 'Nothing'
'''
import src.utils as u

logger = u.get_logger(__name__)


class Strategy:
    def __init__(self):
        pass

    def MACD(self, df):
        '''
        strategy function read data from ohlcv array with length 50, gives a long or short or nothing signal
        '''
        df = u.macd(df)
        logger.debug(
            f'DIF: {str(df.macd.values[-2])} - {str(df.macd.values[-1])}')
        logger.debug(
            f'DEA: {str(df.macd_signal.values[-2])} - {str(df.macd_signal.values[-1])}'
        )
        logger.debug(
            f'BAR: {str(df.macd_diff.values[-2])} - {str(df.macd_diff.values[-1])}'
        )
        if u.crossover(df.macd.values, df.macd_signal.values):
            return 'Buy'
        elif u.crossunder(df.macd.values, df.macd_signal.values):
            return 'Sell'
Ejemplo n.º 22
0
    # Path for db
    PATH_DB = Path().cwd() / "data" / "db.json"
    PATH_DB.mkdir(parents=True, exist_ok=True)

    # Bot name - "<platform>:<name>:v<version> (by /u/<username>)"
    USER_AGENT = ""

    # Reddit client id/secret
    HTTP_AUTH_LOGIN = ""
    HTTP_AUTH_PASSWORD = ""

    # Reddit login/password
    REDDIT_LOGIN = ""
    REDDIT_PASSWORD = ""

logger = get_logger("reddit_archiver", file_name="reddit_archiver.log")


@lru_cache()
def get_token(ttl_hash: int = None):
    """
    Authenticate with Reddit and receive token.

    :return: token.
    """
    del ttl_hash

    client_auth = requests.auth.HTTPBasicAuth(HTTP_AUTH_LOGIN,
                                              HTTP_AUTH_PASSWORD)
    post_data = {
        "grant_type": "password",
Ejemplo n.º 23
0
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import uuid
import base64
# Works in lambda environment
import src.utils as utils
logger = utils.get_logger()


class ApiGateway():
    def __init__(self, lambda_instance):
        self.lambda_instance = lambda_instance

    def is_post_request_with_body(self):
        return self.lambda_instance.event['httpMethod'] == 'POST'

    def is_post_request_with_body_json(self):
        return self.lambda_instance.event[
            'httpMethod'] == 'POST' and self.lambda_instance.event['headers'][
                'Content-Type'].strip() == 'application/json'

    def is_request_with_parameters(self):
 def initialize(self):
     self.logger = get_logger('auth')
Ejemplo n.º 25
0
    default=0,
    help='start loop number, used to calculate first epoch number')
parser.add_argument(
    '--end',
    type=int,
    default=18,
    help='end loop number, used to calculate last epoch number')

args, _ = parser.parse_known_args()

if __name__ == "__main__":
    # logger
    args.outputs_dir = os.path.join(
        args.ckpt_path,
        datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir, args.rank)
    args.logger.save_args(args)

    if args.ckpt_name != "":
        args.start = 0
        args.end = 1

    for loop in range(args.start, args.end, 1):
        network = CenterfaceMobilev2()
        default_recurisive_init(network)

        if args.ckpt_name == "":
            ckpt_num = loop * args.device_num + args.rank + 1
            ckpt_name = "0-" + str(ckpt_num) + "_" + str(
                args.steps_per_epoch * ckpt_num) + ".ckpt"
        else:
Ejemplo n.º 26
0
 def initialize(self):
     self.connection = get_mongodb_connection()
     self._db = self.connection[options.auth_db]
     self.logger = get_logger('auth')
     self.auth_driver = get_driver(options.auth_driver)
Ejemplo n.º 27
0
import collections
from datetime import datetime as dt
from src.adapters import BaseAdapter
from src.utils import get_logger


LOGGER = get_logger(__name__)


class LStreamingAdapter(BaseAdapter):
    def __init__(self):
        BaseAdapter.__init__(self)
        self.__figure = None

    @property
    def data(self):
        return self.__data

    def update_data(self):
        content = self.get_content()
        updated = dt.now()

        self.__data = {
            'content': content,
            'last_update': updated.isoformat(),
            'figure': self.get_figure()
        }

    def get_figure(self):
        if self.__figure is None:
            return self.__get_figure_updated()
Ejemplo n.º 28
0
# from test_tube import Experiment
from pathlib import Path
from typing import List

import hydra
import numpy as np
import pandas as pd
import torch
from omegaconf import DictConfig
from pytorch_lightning import Callback, LightningDataModule, LightningModule, Trainer
from pytorch_lightning.loggers import LightningLoggerBase

from src.utils import get_logger
from src.utils.metrics import purity

log = get_logger(__name__)


def train_model(config: DictConfig):
    """
    Training module for clustering of event sequences
    """
    np.set_printoptions(threshold=10000)
    torch.set_printoptions(threshold=10000)
    default_save_dir = config.save_dir
    # Init and prepare lightning datamodule
    log.info(f"Instantiating datamodule <{config.datamodule._target_}>")
    dm: LightningDataModule = hydra.utils.instantiate(config.datamodule)
    dm.prepare_data()

    for i in range(config.n_runs):
Ejemplo n.º 29
0
args = get_args()
if args.apex:
    from apex import amp

EXP_ID = os.path.splitext(os.path.basename(args.config))[0]
SEED = 42 + 1
DEVICE = "cuda"

import json
setting_json = open('SETTINGS.json', 'r')
setting_json = json.load(setting_json)

output_dir = Path(setting_json["OUTPUT"]) / EXP_ID
output_dir.mkdir(exist_ok=True, parents=True)
_logger = get_logger(output_dir / f"fold{args.fold}_output.log")


def log(msg):
    _logger.info(msg)


def log_w(msg):
    _logger.warn(msg)


log(f'EXP {EXP_ID} start')


def main():
    config = utils.load_config(args.config)
Ejemplo n.º 30
0
except:
    os.system('pip install lightgbm')

import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

from src.utils import get_logger

LOGGER = get_logger('GLOBAL')

current_path = os.path.abspath(__file__)
config_file_path = os.path.abspath(
    os.path.join(os.path.dirname(current_path), 'config.json'))
import json

config = json.load(open(config_file_path, 'r'))

from src.meta.multiprocess import multiprocess
from src.meta.proto import prototype_multi

if config['epochs'] == 'auto':
    config['epochs'] = prototype_multi.MAX_LOCAL_EPOCH

Ejemplo n.º 31
0
from statistics import mean

import cv2

from args import get_args
from src.label import Shape, writeShapes
from src.tflite_utils import load_model, detect_lp
from src.utils import im2single, get_model_memory_usage, get_logger, setup_dirs, get_gzipped_model_size


def adjust_pts(pts, lroi):
    return pts * lroi.wh().reshape((2, 1)) + lroi.tl().reshape((2, 1))


setup_dirs()
logger = get_logger("lp-tflite-detection")
args = get_args()

if __name__ == '__main__':
    args = get_args()
    if args.use_colab:
        from google.colab import drive

        drive.mount('/content/gdrive')
        OUTPUT_DIR = '/content/gdrive/My Drive/lpd/{}_{}_{}_{}_{}'.format(
            args.image_size, args.epochs, args.prune_model,
            args.initial_sparsity, args.final_sparsity)
        if not os.path.isdir(OUTPUT_DIR): os.makedirs(OUTPUT_DIR)
        tflite_path = '{}/{}.tflite'.format(OUTPUT_DIR, args.model)
        pruned_tflite_path = '{}/{}_pruned.tflite'.format(
            OUTPUT_DIR, args.model)
Ejemplo n.º 32
0
def job(tuning, params_path, devices, resume, save_interval):
    global params
    if tuning:
        with open(params_path, 'r') as f:
            params = json.load(f)
        mode_str = 'tuning'
        setting = '_'.join(f'{tp}-{params[tp]}'
                           for tp in params['tuning_params'])
    else:
        mode_str = 'train'
        setting = ''

    # パラメーターを変えるときにseedも変えたい(seed averagingの効果を期待)
    seed = sum(ord(_) for _ in str(params.values()))
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False

    exp_path = ROOT + f'experiments/{params["ex_name"]}/'
    os.environ['CUDA_VISIBLE_DEVICES'] = devices

    logger, writer = utils.get_logger(
        log_dir=exp_path + f'{mode_str}/log/{setting}',
        tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}')

    if params['augmentation'] == 'soft':
        params['scale_limit'] = 0.2
        params['brightness_limit'] = 0.1
    elif params['augmentation'] == 'middle':
        params['scale_limit'] = 0.3
        params['shear_limit'] = 4
        params['brightness_limit'] = 0.1
        params['contrast_limit'] = 0.1
    else:
        raise ValueError

    train_transform, eval_transform = data_utils.build_transforms(
        scale_limit=params['scale_limit'],
        shear_limit=params['shear_limit'],
        brightness_limit=params['brightness_limit'],
        contrast_limit=params['contrast_limit'],
    )

    data_loaders = data_utils.make_train_loaders(
        params=params,
        data_root=ROOT + 'input/' + params['data'],
        train_transform=train_transform,
        eval_transform=eval_transform,
        scale='S',
        test_size=0,
        class_topk=params['class_topk'],
        num_workers=8)

    model = models.LandmarkNet(
        n_classes=params['class_topk'],
        model_name=params['model_name'],
        pooling=params['pooling'],
        loss_module=params['loss'],
        s=params['s'],
        margin=params['margin'],
        theta_zero=params['theta_zero'],
        use_fc=params['use_fc'],
        fc_dim=params['fc_dim'],
    ).cuda()
    optimizer = utils.get_optim(params, model)
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer,
        T_max=params['epochs'] * len(data_loaders['train']),
        eta_min=3e-6)
    start_epoch = 0

    if len(devices.split(',')) > 1:
        model = nn.DataParallel(model)

    for epoch in range(start_epoch, params['epochs']):

        logger.info(
            f'Epoch {epoch}/{params["epochs"]} | lr: {optimizer.param_groups[0]["lr"]}'
        )

        # ============================== train ============================== #
        model.train(True)

        losses = utils.AverageMeter()
        prec1 = utils.AverageMeter()

        for i, (_, x, y) in tqdm(enumerate(data_loaders['train']),
                                 total=len(data_loaders['train']),
                                 miniters=None,
                                 ncols=55):
            x = x.to('cuda')
            y = y.to('cuda')

            outputs = model(x, y)
            loss = criterion(outputs, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()

            acc = metrics.accuracy(outputs, y)
            losses.update(loss.item(), x.size(0))
            prec1.update(acc, x.size(0))

            if i % 100 == 99:
                logger.info(
                    f'{epoch+i/len(data_loaders["train"]):.2f}epoch | {setting} acc: {prec1.avg}'
                )

        train_loss = losses.avg
        train_acc = prec1.avg

        writer.add_scalars('Loss', {'train': train_loss}, epoch)
        writer.add_scalars('Acc', {'train': train_acc}, epoch)
        writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch)

        if (epoch + 1) == params['epochs'] or (epoch + 1) % save_interval == 0:
            output_file_name = exp_path + f'ep{epoch}_' + setting + '.pth'
            utils.save_checkpoint(path=output_file_name,
                                  model=model,
                                  epoch=epoch,
                                  optimizer=optimizer,
                                  params=params)

    model = model.module
    datasets = ('roxford5k', 'rparis6k')
    results = eval_datasets(model,
                            datasets=datasets,
                            ms=False,
                            tta_gem_p=1.0,
                            logger=logger)

    if tuning:
        tuning_result = {}
        for d in datasets:
            for key in ['mapE', 'mapM', 'mapH']:
                mapE, mapM, mapH, mpE, mpM, mpH, kappas = results[d]
                tuning_result[d + '-' + key] = [eval(key)]
        utils.write_tuning_result(params, tuning_result,
                                  exp_path + 'tuning/results.csv')
Ejemplo n.º 33
0
 def __init__(self):
     self.logger =  get_logger('auth')