Exemple #1
0
    def __init__(self):
        # directory for training outputs
        if not os.path.exists(self.output_path):
            os.makedirs(self.output_path)

        # create instance of logger
        self.logger = get_logger(self.log_path)
Exemple #2
0
def config_from_args(args):
    config = Config()
    for key, value in vars(args).items():
        config.__dict__[key] = value
    config.auto_config()
    logger = get_logger(config.log_path)
    return config, logger
    def __init__(
        self,
        feature_embedding_list,
        feature_num,
        feature_weight_dropout_list,
        label2id,
        num_class,
        batch_size,
        epoch_num,
        max_patience,
        num_layers,
        rnn_unit,
        hidden_dim,
        dropout,
        optimizer,
        lr,
        clip,
        use_crf,
        output_path,
        is_attention,
        config,
    ):
        #数据

        self.feature_embedding_list = feature_embedding_list
        self.feature_num = feature_num
        self.weight_dropout_list = feature_weight_dropout_list
        self.label2id = label2id
        self.num_class = num_class
        #模型参数
        self.batch_size = batch_size
        self.epoch_num = epoch_num
        self.max_patience = max_patience

        self.num_layers = num_layers  #bilstm layers
        self.rnn_unit = rnn_unit
        self.hidden_dim = hidden_dim

        self.dropout = dropout

        self.optimizer = optimizer
        self.lr = lr
        self.clip = clip

        self.use_crf = use_crf
        self.is_attention = is_attention
        self.outputpath = output_path
        self.config = config

        self.model_path = os.path.join(self.outputpath, "checkpoints/")
        if not os.path.exists(self.model_path):
            os.makedirs(self.model_path)
        result_path = os.path.join(self.outputpath, "results")
        if not os.path.exists(result_path):
            os.makedirs(result_path)
        log_path = os.path.join(result_path, "log.txt")
        self.logger = get_logger(log_path)
        # self.logger.info(self.config)

        self.build()
Exemple #4
0
    def __init__(self):
        # directory for training outputs
        if not os.path.exists(self.output_path):
            os.makedirs(self.output_path)

        # create instance of logger
        self.logger = get_logger(self.log_path)
Exemple #5
0
    def __init__(self, override_flags=None, load=True):
        # directory for training outputs

        if override_flags:
            for attr, value in override_flags.__dict__.items():
                setattr(self, attr, value)

        if not os.path.exists(self.dir_output):
            os.makedirs(self.dir_output)

        self.dir_model = self.dir_output + "/model.weights"
        self.path_log = self.dir_output + "/log.txt"
        self.logger = get_logger(self.path_log)
        self.result_log = self.dir_output + "/results.log"

        if load:
            self.load()
Exemple #6
0
                    help="Iterations to initialize transport map")
parser.add_argument("--n_train", type=int, required=True)
parser.add_argument("--b_final_eval",
                    type=int,
                    default=1000,
                    help="Batch size for final evaluation")
parser.add_argument("--crayon_send_stats_iters",
                    type=int,
                    default=20,
                    help="How many iters we send stats to tensorboard")
parser.add_argument("--n_models_saved",
                    type=int,
                    default=50,
                    help="How many snapshot of intermediate models we save")
args = parser.parse_args()
logger = get_logger("heuristic_covariance_w_tpreg_flow")

if args.today is None:
    args.today = datetime.date.today().strftime("%Y-%m-%d")

logger.info(f"Using experiment date: {args.today}")

# Experiment Name
exp_name = f"cov_tpreg_ninit_{args.n_init}_ntrain_{args.n_train}_{args.today}"
logger.info(f"Experiment name: {exp_name}")

# create directories for plots, models & evaluation
plots_dir = f"{args.base_plots_dir}/{exp_name}"
logger.info(f"For plotting using dir: {plots_dir}")
plots_dir = create_directory(plots_dir, delete_if_exists=True)
Exemple #7
0
vocab_words = load_vocab(config.words_filename)
vocab_tags = load_vocab(config.tags_filename)

# get processing functions
processing_word = get_processing_word(vocab_words, lowercase=config.lowercase)
processing_tag = get_processing_word(vocab_tags, lowercase=False)

# get pre trained embeddings

embeddings = get_trimmed_glove_vectors(config.trimmed_filename)

# create dataset
dev = CoNLLDataset(config.dev_filename, processing_word, processing_tag,
                   config.max_iter)
test = CoNLLDataset(config.test_filename, processing_word, processing_tag,
                    config.max_iter)
train = CoNLLDataset(config.train_filename, processing_word, processing_tag,
                     config.max_iter)

# get logger
logger = get_logger(config.log_path)

# build model
model = NERModel(config, embeddings, ntags=len(vocab_tags), logger=logger)
model.build()

# train, evaluate and interact
model.train(train, dev, vocab_tags)
#model.evaluate(test, vocab_tags)
#model.interactive_shell(vocab_tags, processing_word)
Exemple #8
0
                    default=1000,
                    help="Batch size for final evaluation")
parser.add_argument("--lambda_par",
                    type=float,
                    required=True,
                    help="Inflating factor for the exponential flow")
parser.add_argument("--crayon_send_stats_iters",
                    type=int,
                    default=20,
                    help="How many iters we send stats to tensorboard")
parser.add_argument("--n_models_saved",
                    type=int,
                    default=50,
                    help="How many snapshot of intermediate models we save")
args = parser.parse_args()
logger = get_logger("heuristic_exp_flow")

if args.today is None:
    args.today = datetime.date.today().strftime("%Y-%m-%d")

logger.info(f"Using experiment date: {args.today}")

# Experiment Name
exp_name = f"exp_ninit_{args.n_init}_ntrain_{args.n_train}_lambda_{args.lambda_par}_{args.today}"
logger.info(f"Experiment name: {exp_name}")

# create directories for plots, models & evaluation
plots_dir = f"{args.base_plots_dir}/{exp_name}"
logger.info(f"For plotting using dir: {plots_dir}")
plots_dir = create_directory(plots_dir, delete_if_exists=True)
    required=True,
    help="Maximal # of iterations in the inner supervised loop")
parser.add_argument("--inner_sink_iter",
                    type=int,
                    required=True,
                    help="# of Sinkhorn iterations")
parser.add_argument("--crayon_send_stats_iters",
                    type=int,
                    default=20,
                    help="How many iters we send stats to tensorboard")
parser.add_argument("--n_models_saved",
                    type=int,
                    default=50,
                    help="How many snapshot of intermediate models we save")
args = parser.parse_args()
logger = get_logger("supervised_dual_space")

if args.today is None:
    args.today = datetime.date.today().strftime("%Y-%m-%d")

logger.info(f"Using experiment date: {args.today}")

# Experiment Name
exp_name = f"supervised_prob_space_{args.n_init}_ntrain_{args.n_train}_ntraintmap_{args.n_train_tmap}_eps_{args.epsilon}_mxinneriter_{args.max_inner_iter}_innsink_{args.inner_sink_iter}_{args.today}"
logger.info(f"Experiment name: {exp_name}")

# create directories for plots, models & evaluation
plots_dir = f"{args.base_plots_dir}/{exp_name}"
logger.info(f"For plotting using dir: {plots_dir}")
plots_dir = create_directory(plots_dir, delete_if_exists=True)
    required=True,
    help="Number of 'closest' points used in computing the discrepancy")
parser.add_argument("--b_final_eval",
                    type=int,
                    default=1000,
                    help="Batch size for final evaluation")
parser.add_argument("--crayon_send_stats_iters",
                    type=int,
                    default=20,
                    help="How many iters we send stats to tensorboard")
parser.add_argument("--n_models_saved",
                    type=int,
                    default=50,
                    help="How many snapshot of intermediate models we save")
args = parser.parse_args()
logger = get_logger("heuristic_discr_flow")

if args.today is None:
    args.today = datetime.date.today().strftime("%Y-%m-%d")

logger.info(f"Using experiment date: {args.today}")

# Experiment Name
exp_name = f"discr_ninit_{args.n_init}_ntrain_{args.n_train}_lambda_{args.lambda_par}_cutoff_{args.cutoff_par}_{args.today}"
logger.info(f"Experiment name: {exp_name}")

# create directories for plots, models & evaluation
plots_dir = f"{args.base_plots_dir}/{exp_name}"
logger.info(f"For plotting using dir: {plots_dir}")
plots_dir = create_directory(plots_dir, delete_if_exists=True)
Exemple #11
0
parser.add_argument("--n_train_tmap", type=int, required=True,
                    help="Training iterations for the transport map")
parser.add_argument("--b_final_eval", type=int, default=1000, help="Batch size for final evaluation")
parser.add_argument("--epsilon", type=float, required=True,
                    help="epsilon for regularization")
parser.add_argument("--regularization", type=str, required=True,
                    choices=["ent", "l2"])
parser.add_argument("--reg_sum_or_mean", type=str, required=True,
                    choices=["sum", "mean"],
                    help="Apply regularization as mean (the math is right) or Seguy (sum, maybe their math is wrong?)")
parser.add_argument("--crayon_send_stats_iters", type=int, default=20,
                    help="How many iters we send stats to tensorboard")
parser.add_argument("--n_models_saved", type=int, default=50,
                    help="How many snapshot of intermediate models we save")
args = parser.parse_args()
logger = get_logger("dual_transport_seguy")

if args.today is None:
    args.today = datetime.date.today().strftime("%Y-%m-%d")

logger.info(f"Using experiment date: {args.today}")

# Experiment Name
exp_name = f"dual_seguy_{args.n_init}_ntrain_{args.n_train}_ntraintmap_{args.n_train_tmap}_eps_{args.epsilon}_regtype_{args.regularization}_regagg_{args.reg_sum_or_mean}_{args.today}"
logger.info(f"Experiment name: {exp_name}")

# create directories for plots, models & evaluation
plots_dir = f"{args.base_plots_dir}/{exp_name}"
logger.info(f"For plotting using dir: {plots_dir}")
plots_dir = create_directory(plots_dir, delete_if_exists=True)
Exemple #12
0
    required=True,
    help="Maximal # of iterations in the inner supervised loop")
parser.add_argument("--inner_sink_iter",
                    type=int,
                    required=True,
                    help="# of Sinkhorn iterations")
parser.add_argument("--crayon_send_stats_iters",
                    type=int,
                    default=20,
                    help="How many iters we send stats to tensorboard")
parser.add_argument("--n_models_saved",
                    type=int,
                    default=50,
                    help="How many snapshot of intermediate models we save")
args = parser.parse_args()
logger = get_logger("supervised_map")

if args.today is None:
    args.today = datetime.date.today().strftime("%Y-%m-%d")

logger.info(f"Using experiment date: {args.today}")

# Experiment Name
exp_name = f"supervised_map_{args.n_init}_ntrain_{args.n_train}_eps_{args.epsilon}_mxinneriter_{args.max_inner_iter}_innsink_{args.inner_sink_iter}_{args.today}"
logger.info(f"Experiment name: {exp_name}")

# create directories for plots, models & evaluation
plots_dir = f"{args.base_plots_dir}/{exp_name}"
logger.info(f"For plotting using dir: {plots_dir}")
plots_dir = create_directory(plots_dir, delete_if_exists=True)
Exemple #13
0
 def __init__(self):
     if not os.path.exists(self.output_path):
         os.makedirs(self.output_path)
     self.logger = get_logger(self.log_path)
Exemple #14
0
                    help="Inflating factor for the critic loss")
parser.add_argument("--n_critic",
                    type=int,
                    required=True,
                    help="Number of critic iterations in the inner loop")
parser.add_argument("--grad_clip", type=float, default=None)
parser.add_argument("--crayon_send_stats_iters",
                    type=int,
                    default=20,
                    help="How many iters we send stats to tensorboard")
parser.add_argument("--n_models_saved",
                    type=int,
                    default=50,
                    help="How many snapshot of intermediate models we save")
args = parser.parse_args()
logger = get_logger("adversarial_transport")

if args.today is None:
    args.today = datetime.date.today().strftime("%Y-%m-%d")

logger.info(f"Using experiment date: {args.today}")

# Experiment Name
exp_name = f"adversarial_transport_ninit_{args.n_init}_ntrain_{args.n_train}_lambda_{args.lambda_critic}_ncritic_{args.n_critic}_gradclip_{args.grad_clip}_{args.today}"
logger.info(f"Experiment name: {exp_name}")

# create directories for plots, models & evaluation
plots_dir = f"{args.base_plots_dir}/{exp_name}"
logger.info(f"For plotting using dir: {plots_dir}")
plots_dir = create_directory(plots_dir, delete_if_exists=True)
                    help="Iterations to initialize transport map")
parser.add_argument("--n_train", type=int, required=True)
parser.add_argument("--b_final_eval",
                    type=int,
                    default=1000,
                    help="Batch size for final evaluation")
parser.add_argument("--crayon_send_stats_iters",
                    type=int,
                    default=20,
                    help="How many iters we send stats to tensorboard")
parser.add_argument("--n_models_saved",
                    type=int,
                    default=50,
                    help="How many snapshot of intermediate models we save")
args = parser.parse_args()
logger = get_logger("heuristic_covariance_flow")

if args.today is None:
    args.today = datetime.date.today().strftime("%Y-%m-%d")

logger.info(f"Using experiment date: {args.today}")

# Experiment Name
exp_name = f"cov_ninit_{args.n_init}_ntrain_{args.n_train}_{args.today}"
logger.info(f"Experiment name: {exp_name}")

# create directories for plots, models & evaluation
plots_dir = f"{args.base_plots_dir}/{exp_name}"
logger.info(f"For plotting using dir: {plots_dir}")
plots_dir = create_directory(plots_dir, delete_if_exists=True)
import json
import pickle
import os
import torch

from config import get_train_args
from training import Training
from general_utils import get_logger

args = get_train_args()
if not os.path.exists(args.output_path):
    os.makedirs(args.output_path)
logger = get_logger(args.log_path)
logger.info(json.dumps(args.__dict__, indent=4))

# Reading the int indexed text dataset
train_data = torch.load(os.path.join(args.input,
                                     args.save_data + ".train.pth"))
dev_data = torch.load(os.path.join(args.input, args.save_data + ".valid.pth"))
test_data = torch.load(os.path.join(args.input, args.save_data + ".test.pth"))
unlabel_data = torch.load(
    os.path.join(args.input, args.save_data + ".unlabel.pth"))

# Reading the word vocab file
with open(os.path.join(args.input, args.save_data + '.vocab.pickle'),
          'rb') as f:
    id2w = pickle.load(f)

# Reading the label vocab file
with open(os.path.join(args.input, args.save_data + '.label.pickle'),
          'rb') as f:
parser.add_argument("--today", type=str, default=None)
parser.add_argument("--base_plots_dir", type=str, default="~/opt_w_dl_plots_1")
parser.add_argument("--base_models_dir", type=str, default="~/opt_w_dl_models_1")
parser.add_argument("--base_evaluation_dir", type=str, default="~/opt_w_dl_evaluation_1")
parser.add_argument("--numpy_seed", type=int, default=15)
parser.add_argument("--n_init", type=int, required=True, help="Iterations to initialize transport map")
parser.add_argument("--n_train", type=int, required=True)
parser.add_argument("--lambda_par", type=float, required=True, help="Inflating factor for the exponential flow")
parser.add_argument("--cutoff_par", type=int, required=True, help="Number of 'closest' points used in computing the discrepancy")
parser.add_argument("--b_final_eval", type=int, default=1000, help="Batch size for final evaluation")
parser.add_argument("--crayon_send_stats_iters", type=int, default=20,
                    help="How many iters we send stats to tensorboard")
parser.add_argument("--n_models_saved", type=int, default=50,
                    help="How many snapshot of intermediate models we save")
args = parser.parse_args()
logger = get_logger("heuristic_discr_w_tpreg_flow")

if args.today is None:
    args.today = datetime.date.today().strftime("%Y-%m-%d")

logger.info(f"Using experiment date: {args.today}")

# Experiment Name
exp_name = f"discr_tpreg_ninit_{args.n_init}_ntrain_{args.n_train}_lambda_{args.lambda_par}_cutoff_{args.cutoff_par}_{args.today}"
logger.info(f"Experiment name: {exp_name}")

# create directories for plots, models & evaluation
plots_dir = f"{args.base_plots_dir}/{exp_name}"
logger.info(f"For plotting using dir: {plots_dir}")
plots_dir = create_directory(plots_dir, delete_if_exists=True)
                    default=1000,
                    help="Batch size for final evaluation")
parser.add_argument("--lambda_par",
                    type=float,
                    required=True,
                    help="Inflating factor for the exponential flow")
parser.add_argument("--crayon_send_stats_iters",
                    type=int,
                    default=20,
                    help="How many iters we send stats to tensorboard")
parser.add_argument("--n_models_saved",
                    type=int,
                    default=50,
                    help="How many snapshot of intermediate models we save")
args = parser.parse_args()
logger = get_logger("heuristic_exp_w_tpreg_flow")

if args.today is None:
    args.today = datetime.date.today().strftime("%Y-%m-%d")

logger.info(f"Using experiment date: {args.today}")

# Experiment Name
exp_name = f"exp_tpreg_ninit_{args.n_init}_ntrain_{args.n_train}_lambda_{args.lambda_par}_{args.today}"
logger.info(f"Experiment name: {exp_name}")

# create directories for plots, models & evaluation
plots_dir = f"{args.base_plots_dir}/{exp_name}"
logger.info(f"For plotting using dir: {plots_dir}")
plots_dir = create_directory(plots_dir, delete_if_exists=True)