def load(self, task_name=None): if self.name == 'tf': from eight_mile.tf.layers import set_tf_log_level, set_tf_eager_debug set_tf_log_level(os.getenv("MEAD_TF_LOG_LEVEL", "ERROR")) set_tf_eager_debug(str2bool(os.getenv("MEAD_TF_EAGER_DEBUG", "FALSE"))) base_pkg_name = 'baseline.{}'.format(self.name) # Backends may not be downloaded to the cache, they must exist locally mod = import_user_module(base_pkg_name) import_user_module('baseline.{}.optz'.format(self.name)) import_user_module('baseline.{}.embeddings'.format(self.name)) import_user_module('mead.{}.exporters'.format(self.name)) if task_name is not None: try: import_user_module(f'{base_pkg_name}.{task_name}') except: logger.warning(f"No module found [{base_pkg_name}.{task_name}]") self.transition_mask = mod.transition_mask
default='../data/stsa.binary.test') parser.add_argument( '--embeddings', help='Pretrained embeddings file', default='/data/embeddings/GoogleNews-vectors-negative300.bin') parser.add_argument('--ll', help='Log level', type=str, default='info') parser.add_argument('--lr', help='Learning rate', type=float, default=0.001) parser.add_argument('--tf_ll', help='TensorFlow Log level', type=str, default='warn') args = parser.parse_known_args()[0] logging.basicConfig(level=get_logging_level(args.ll)) set_tf_log_level(args.tf_ll) feature_desc = { 'word': { 'vectorizer': baseline.Token1DVectorizer(mxlen=100, transform_fn=baseline.lowercase), 'embed': { 'file': args.embeddings, 'type': 'default', 'unif': 0.25 } } } vectorizers = {k: v['vectorizer'] for k, v in feature_desc.items()} reader = baseline.TSVSeqLabelReader(
args = parser.parse_known_args()[0] embed_type = 'learned-positional' if args.transformer else 'default' feature_desc = { 'word': { 'vectorizer': baseline.Token1DVectorizer(mxlen=-1, transform_fn=baseline.lowercase), 'embed': { 'embed_file': args.embeddings, 'embed_type': embed_type, 'unif': 0.05 } } } set_tf_log_level('ERROR') vectorizers = {k: v['vectorizer'] for k, v in feature_desc.items()} reader = baseline.LineSeqReader(vectorizers, nctx=args.nctx) train_file = args.train valid_file = args.valid test_file = args.test # This builds a set of counters vocabs = reader.build_vocab([train_file, valid_file, test_file]) # This builds a set of embeddings objects, these are typically not DL-specific # but if they happen to be addons, they can be embeddings = dict() for k, v in feature_desc.items(): embed_config = v['embed']