Example #1
0
    def load(self, task_name=None):
        if self.name == 'tf':
            from eight_mile.tf.layers import set_tf_log_level, set_tf_eager_debug
            set_tf_log_level(os.getenv("MEAD_TF_LOG_LEVEL", "ERROR"))
            set_tf_eager_debug(str2bool(os.getenv("MEAD_TF_EAGER_DEBUG", "FALSE")))

        base_pkg_name = 'baseline.{}'.format(self.name)
        # Backends may not be downloaded to the cache, they must exist locally
        mod = import_user_module(base_pkg_name)
        import_user_module('baseline.{}.optz'.format(self.name))
        import_user_module('baseline.{}.embeddings'.format(self.name))
        import_user_module('mead.{}.exporters'.format(self.name))
        if task_name is not None:
            try:
                import_user_module(f'{base_pkg_name}.{task_name}')
            except:
                logger.warning(f"No module found [{base_pkg_name}.{task_name}]")
        self.transition_mask = mod.transition_mask
                    default='../data/stsa.binary.test')
parser.add_argument(
    '--embeddings',
    help='Pretrained embeddings file',
    default='/data/embeddings/GoogleNews-vectors-negative300.bin')
parser.add_argument('--ll', help='Log level', type=str, default='info')
parser.add_argument('--lr', help='Learning rate', type=float, default=0.001)
parser.add_argument('--tf_ll',
                    help='TensorFlow Log level',
                    type=str,
                    default='warn')

args = parser.parse_known_args()[0]

logging.basicConfig(level=get_logging_level(args.ll))
set_tf_log_level(args.tf_ll)

feature_desc = {
    'word': {
        'vectorizer':
        baseline.Token1DVectorizer(mxlen=100, transform_fn=baseline.lowercase),
        'embed': {
            'file': args.embeddings,
            'type': 'default',
            'unif': 0.25
        }
    }
}

vectorizers = {k: v['vectorizer'] for k, v in feature_desc.items()}
reader = baseline.TSVSeqLabelReader(
Example #3
0
args = parser.parse_known_args()[0]

embed_type = 'learned-positional' if args.transformer else 'default'
feature_desc = {
    'word': {
        'vectorizer':
        baseline.Token1DVectorizer(mxlen=-1, transform_fn=baseline.lowercase),
        'embed': {
            'embed_file': args.embeddings,
            'embed_type': embed_type,
            'unif': 0.05
        }
    }
}

set_tf_log_level('ERROR')
vectorizers = {k: v['vectorizer'] for k, v in feature_desc.items()}
reader = baseline.LineSeqReader(vectorizers, nctx=args.nctx)

train_file = args.train
valid_file = args.valid
test_file = args.test

# This builds a set of counters
vocabs = reader.build_vocab([train_file, valid_file, test_file])

# This builds a set of embeddings objects, these are typically not DL-specific
# but if they happen to be addons, they can be
embeddings = dict()
for k, v in feature_desc.items():
    embed_config = v['embed']