예제 #1
0
파일: srl_utils.py 프로젝트: jgung/tf-nlp
def main(opts):
    mappings = get_argument_function_mappings(opts.frames)

    def mapping_fn(rs, r):
        r = arg_to_a(r)
        return apply_numbered_arg_mappings(rs,
                                           r,
                                           mappings,
                                           ignore_unmapped=True,
                                           combine_modifiers=opts.combine,
                                           append=opts.append)

    mapping_function = mapping_fn
    if opts.mappings:
        # apply additional mappings to the output of the mapping function, returning original label if not mapped
        json_mappings = _add_c_r_mappings(read_json(opts.mappings))

        def updated_mapping_fn(rs, r):
            r = arg_to_a(r)

            mapped = apply_numbered_arg_mappings(
                rs,
                r,
                mappings,
                ignore_unmapped=True,
                combine_modifiers=opts.combine,
                append=opts.append)

            return json_mappings.get(mapped, r)

        mapping_function = updated_mapping_fn

    if opts.mappings:
        tag = os.path.splitext(os.path.basename(opts.mappings))[0]
    elif opts.combine:
        tag = 'combined'
    elif opts.append:
        tag = 'split'
    else:
        tag = 'core-mod'

    if not os.path.exists(opts.output):
        os.makedirs(opts.output)

    mode_map = {
        'map': CoNllArgMapper(mapping_function, opts.output, tag=tag),
        'count': CoNllArgCounter(mapping_function, opts.output, tag=tag),
        'phrases': CoNllPhraseWriter(mapping_function, opts.output, tag=tag)
    }

    mode_map['map'] = AggregateProcessor([mode_map['map'], mode_map['count']],
                                         tag=tag)

    processor = mode_map[opts.mode]

    print('running mappings script in %s/%s mode...' % (tag, opts.mode))

    for file in [f for f in opts.input.split(',') if f]:
        print('processing %s...' % file)
        processor.process_file(file)
예제 #2
0
파일: trainer.py 프로젝트: jgung/tf-nlp
    def __init__(self,
                 save_dir_path: str,
                 config: Optional[dict] = None,
                 resources_dir_path: Optional[str] = '',
                 script_file_path: Optional[str] = None,
                 model_fn: Optional[TF_MODEL_FN] = multi_head_model_fn,
                 debug: bool = False) -> None:
        super().__init__()
        self._job_dir = save_dir_path

        config_path = os.path.join(self._job_dir, constants.CONFIG_PATH)
        if not gfile.exists(config_path):
            write_json(config, config_path)
        if not config:
            config = read_json(config_path)
        self._training_config = get_network_config(config)

        self._model_path = os.path.join(self._job_dir, constants.MODEL_PATH)
        self._vocab_path = os.path.join(self._job_dir, constants.VOCAB_PATH)
        self._resources = resources_dir_path
        self._eval_script_path = script_file_path
        self._model_fn = model_fn
        self._debug = debug

        self._data_path_fn = lambda orig: os.path.join(
            self._job_dir,
            os.path.basename(orig) + ".tfrecords")

        self._feature_extractor = None
예제 #3
0
def read_config(config_path: str, config_overrides: list,
                param_overrides: str):
    base_config = read_json(config_path)

    configs = config_overrides

    override_configs = {}
    if configs:
        if len(configs) % 2 == 1:
            raise ValueError(
                'Expecting an even number of values (key, config) pairs')
        for k, config in zip(configs[::2], configs[1::2]):
            override_configs[k] = read_json(config)

    override_params = {}
    if param_overrides:
        key_value_pairs = [
            t.split('=') for t in param_overrides.split(',') if t.strip()
        ]
        override_params = {t[0]: t[1] for t in key_value_pairs}

    result = build_config(base_config, override_configs, override_params,
                          config_path)
    return result
예제 #4
0
파일: predictor.py 프로젝트: jgung/tf-nlp
def from_config_and_savedmodel(path_to_config: str, path_to_savedmodel: str,
                               path_to_vocab: str) -> type(Predictor):
    """
    Initialize a savedmodel from a configuration, saved model, and vocabulary.
    :param path_to_config: path to trainer configuration
    :param path_to_savedmodel: path to TF saved model
    :param path_to_vocab: path to vocabulary directory
    :return: initialized predictor
    """
    config = get_network_config(read_json(path_to_config))

    tf.logging.info("Loading predictor from saved model at %s" %
                    path_to_savedmodel)
    tf_predictor = _default_predictor(path_to_savedmodel)
    parser_function = get_parser(config)
    feature_function = _get_feature_function(config.features, config.heads,
                                             path_to_vocab)
    formatter = get_formatter(config)

    return Predictor(tf_predictor, parser_function, feature_function,
                     formatter, default_batching_function(config.batch_size))
예제 #5
0
 def _sub_str(value):
     if value.endswith(JSON_EXT):
         if value.startswith("."):
             value = os.path.join(config_dir, value)
         return read_json(value)
     return value
예제 #6
0
import numpy as np

from tfnlp.common.embedding import read_vectors, write_vectors
from tfnlp.common.utils import read_json

if __name__ == '__main__':
    vectors, dim = read_vectors('data/vectors/glove.6B.100d.txt')
    print("Read %d %d-length vectors" % (len(vectors), dim))
    vn_members = read_json('data/config/experimental/mappings/vn-members.json')

    result = {}
    for cls, members in vn_members.items():
        emb = np.zeros([dim], dtype=np.float32)
        for member in members:
            if member in vectors:
                emb += vectors[member]
        result[cls] = emb
    print("Writing %d vectors" % len(result))
    write_vectors(result, 'data/vectors/vn.glove.6B.100d.txt')