Ejemplo n.º 1
0
def get_distribution_strategy(params):
  """Returns the distribution strategy to use."""
  if params["turn_off_distribution_strategy"]:
    return None

  if params["use_tpu"]:
    # Some of the networking libraries are quite chatty.
    for name in ["googleapiclient.discovery", "googleapiclient.discovery_cache",
                 "oauth2client.transport"]:
      logging.getLogger(name).setLevel(logging.ERROR)

    tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
        tpu=params["tpu"],
        zone=params["tpu_zone"],
        project=params["tpu_gcp_project"],
        coordinator_name="coordinator"
    )

    logging.info("Issuing reset command to TPU to ensure a clean state.")
    tf.Session.reset(tpu_cluster_resolver.get_master())

    # Estimator looks at the master it connects to for MonitoredTrainingSession
    # by reading the `TF_CONFIG` environment variable, and the coordinator
    # is used by StreamingFilesDataset.
    tf_config_env = {
        "session_master": tpu_cluster_resolver.get_master(),
        "eval_session_master": tpu_cluster_resolver.get_master(),
        "coordinator": tpu_cluster_resolver.cluster_spec()
                       .as_dict()["coordinator"]
    }
    os.environ['TF_CONFIG'] = json.dumps(tf_config_env)

    distribution = tf.distribute.experimental.TPUStrategy(
        tpu_cluster_resolver, steps_per_run=100)

  else:
    distribution = distribution_utils.get_distribution_strategy(
        num_gpus=params["num_gpus"])

  return distribution
Ejemplo n.º 2
0
from .tokenization_ctrl import CTRLTokenizer
from .tokenization_distilbert import DistilBertTokenizer, DistilBertTokenizerFast
from .tokenization_flaubert import FlaubertTokenizer
from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
from .tokenization_openai import OpenAIGPTTokenizer, OpenAIGPTTokenizerFast
from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
from .tokenization_t5 import T5Tokenizer
from .tokenization_transfo_xl import TransfoXLCorpus, TransfoXLTokenizer, TransfoXLTokenizerFast

# Tokenizers
from .tokenization_utils import PreTrainedTokenizer
from .tokenization_xlm import XLMTokenizer
from .tokenization_xlm_roberta import XLMRobertaTokenizer
from .tokenization_xlnet import SPIECE_UNDERLINE, XLNetTokenizer

logger = logging.getLogger(__name__)  # pylint: disable=invalid-name

if is_sklearn_available():
    from .data import glue_compute_metrics, xnli_compute_metrics

# Modeling
if is_torch_available():
    from .modeling_utils import PreTrainedModel, prune_layer, Conv1D, top_k_top_p_filtering
    from .modeling_auto import (
        AutoModel,
        AutoModelForPreTraining,
        AutoModelForSequenceClassification,
        AutoModelForQuestionAnswering,
        AutoModelWithLMHead,
        AutoModelForTokenClassification,
        ALL_PRETRAINED_MODEL_ARCHIVE_MAP,
Ejemplo n.º 3
0
# Logging options ##########################################################'
logging.root.removeHandler(absl.logging._absl_handler)
absl.logging._warn_preinit_stderr = False
date = pd.datetime.now().date()
hour = pd.datetime.now().hour
minute = pd.datetime.now().minute
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)-15s %(name)-5s %(levelname)-8s %(message)s',
    filename="image2seq/logs/train_log_{}_{}{}.txt".format(date, hour, minute))
console = logging.StreamHandler()
console.setLevel(logging.INFO)
formatter = logging.Formatter(
    '%(asctime)-15s %(name)-5s %(levelname)-8s %(message)s')
console.setFormatter(formatter)
logging.getLogger("").addHandler(console)

#############################################################################
# Model Setup                                                               #
#############################################################################
logging.info("MODEL SETUP - Tensorflow version".format(tf.__version__))
logging.info("MODEL SETUP - Training Script - train_nested.py")
from tensorflow.python.client import device_lib
logging.info("MODEL SETUP - CUDA VISIBLE DEVICES {}".format(
    device_lib.list_local_devices()))
tf.compat.v1.debugging.assert_equal(True, tf.test.is_gpu_available())
tf.compat.v1.debugging.assert_equal(True, tf.test.is_built_with_cuda())

image2seq = DRAKENESTEDSINGLELSTM()
logging.info("MODEL SETUP - image2seq model {} instantiated".format(
    image2seq.get_model_name()))
Ejemplo n.º 4
0
import hanlp.losses
import hanlp.metrics
import hanlp.optimizers
import hanlp.pretrained
import hanlp.utils

from hanlp.version import __version__

import os

if not os.environ.get('HANLP_SHOW_TF_LOG', None):
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '3'
    import absl.logging, logging

    logging.getLogger('tensorflow').setLevel(logging.ERROR)
    logging.root.removeHandler(absl.logging._absl_handler)
    exec('absl.logging._warn_preinit_stderr = False'
         )  # prevent exporting _warn_preinit_stderr

if not os.environ.get('HANLP_GREEDY_GPU', None):
    exec('from hanlp.utils.tf_util import nice_gpu')
    exec('nice_gpu()')

exec('''
from hanlp.utils.util import ls_resource_in_module
ls_resource_in_module(hanlp.pretrained)
''')


def load(save_dir,
Ejemplo n.º 5
0
from absl import flags
from absl import logging
import numpy as np
import json
import logging
# http 接口
from flask import Flask, jsonify, request
import bert_example
import predict_utils
import tagging_converter
import utils
import tensorflow as tf

app = Flask(__name__)

logger = logging.getLogger('log')
logger.setLevel(logging.DEBUG)

while logger.hasHandlers():
    for i in logger.handlers:
        logger.removeHandler(i)

user_name = ""  # wzk/
version = "1.0.0.0"

block_list = os.path.realpath(__file__).split("/")
path = "/".join(block_list[:-2])
sys.path.append(path)

# FLAGS = flags.FLAGS
FLAGS = tf.app.flags.FLAGS