Beispiel #1
0
def handle_command(command, channel, model, embeddings, metadata):
    if command.startswith("###welcome"):
        response = command[3:]
    else:
        sentence = command.strip()
        if "who are you" in sentence.lower():
            response = "read this :nerd_face: http://arxiv.org/abs/1506.05869"
        else:
            response = utils.get_translation(sess, model, sentence, embeddings,
                                             _buckets, metadata)
    slack_client.api_call("chat.postMessage",
                          channel=channel,
                          text=response,
                          as_user=True)
Beispiel #2
0
    async def translation(self, ctx: Context, *args):
        language_config = get_language_config_by_id(ctx.guild.id)

        if args:

            if args[0] not in get_possible_translations(ctx.guild.id):
                await ctx.send(embed=Embed(
                    description=language_config.not_supported_translation,
                    color=0xff0000))
                return

            translation = config.translation
            translation[str(ctx.guild.id)] = args[0]
            config.save("translation", translation)

            await ctx.send(embed=Embed(
                description=language_config.f_translation_successfully_changed(
                    get_translation(ctx.guild.id)),
                color=0x34bdeb))

            return

        embed = Embed(title=language_config.f_translation_settings_embed_title(
            ctx.guild.name),
                      description=language_config.
                      f_translation_settings_embed_description(
                          get_translation(ctx.guild.id), await
                          get_prefix(ctx.message)),
                      color=0x34bdeb)

        embed.add_field(name=language_config.supported_translations,
                        value="\n".join([
                            f"`{i}`"
                            for i in get_possible_translations(ctx.guild.id)
                        ]))

        await ctx.send(embed=embed)
Beispiel #3
0
    async def on_message(self, message: Message):
        content = message.content
        queries = get_bible_queries(content)

        for query in queries:
            translation = get_language_config_by_id(
                message.guild.id)._translations
            book = query.split(" ")[0]

            try:

                book_name = translation[book]
                translation = get_language_config_by_id(message.guild.id)
                try:
                    request = BibleRequest(book_name,
                                           query.split(" ")[1],
                                           get_translation(message.guild.id))
                except JSONDecodeError:
                    await message.channel.send(embed=Embed(
                        description=translation.quote_not_available,
                        color=0xff0000))
                    continue

                embed = Embed(title=request.book_name + " " +
                              str(request.chapter))

                embed.set_footer(text="Bible quoted using getbible.net",
                                 icon_url=self.bot.user.avatar_url)
                embed.timestamp = datetime.utcnow()

                for verse in request.verses.keys():
                    embed.add_field(name=str(verse),
                                    value=request.verses[verse],
                                    inline=False)

                await message.channel.send(embed=embed)

            except KeyError:
                translation = get_language_config_by_id(message.guild.id)
                await message.channel.send(embed=Embed(
                    description=translation.book_abbr_not_available + "\n" +
                    translation.book_abbr_info + "\n[" +
                    translation.abbreviation_url + "](" +
                    translation.abbreviation_url + ")",
                    color=0xff0000))
Beispiel #4
0
def run_sample_decode(infer_model, infer_sess, hparams, src_data, tgt_data):
    sample_id = random.randint(0, len(src_data) - 1)
    with infer_model.graph.as_default():
        infer_model.model.reinitializer_or_load_model(infer_sess,
                                                      hparams.model_dir)

        iterator_feed_dict = {
            infer_model.src_placeholder: [src_data[sample_id]],
            infer_model.batch_size_placeholder: 1
        }
        infer_sess.run(infer_model.iterator.initializer,
                       feed_dict=iterator_feed_dict)
        nmt_outputs = infer_model.model.decode(infer_sess)
    if hparams.beam_width > 0:
        nmt_outputs = nmt_outputs[0]
    translation = utils.get_translation(nmt_outputs, 0, EOS)
    print('src: %s' % src_data[sample_id])
    print('ref: %s' % tgt_data[sample_id])
    print('nmt: ' + translation)
Beispiel #5
0
def translate(hparams, string):
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    infer_model = create_nmt_model.create_infer_model(hparams)
    infer_sess = tf.Session(graph=infer_model.graph, config=config)
    with infer_model.graph.as_default():
        infer_model.model.reinitializer_or_load_model(infer_sess,
                                                      hparams.model_dir)

        iterator_feed_dict = {
            infer_model.src_placeholder: [string],
            infer_model.batch_size_placeholder: 1
        }
        infer_sess.run(infer_model.iterator.initializer,
                       feed_dict=iterator_feed_dict)
        nmt_outputs = infer_model.model.decode(infer_sess)
    if hparams.beam_width > 0:
        nmt_outputs = nmt_outputs[0]
    translation = utils.get_translation(nmt_outputs, 0, hparams.eos)
    return translation
Beispiel #6
0
    async def settings(self, ctx: Context):
        """
        Manage the Settings of BibleBot
        """
        if ctx.invoked_subcommand is None:
            message = await ctx.send(embed=Embed(
                description="Gathering Information ..."))

            settings = get_language_config_by_id(ctx.guild.id)

            embed = Embed(
                title=settings.f_general_settings_embed_title(ctx.guild.name),
                description=settings.f_general_settings_embed_description(
                    await get_prefix(ctx.message)),
                color=0x34bdeb)

            embed.add_field(
                name=settings.general_settings_embed_field_prefix_title,
                value=settings.
                f_general_settings_embed_field_prefix_description(
                    await get_prefix(ctx.message), await
                    get_prefix(ctx.message)))

            embed.add_field(
                name=settings.general_settings_embed_field_language_title,
                value=settings.
                f_general_settings_embed_field_language_description(
                    get_language(ctx.guild.id), await get_prefix(ctx.message)))

            embed.add_field(
                name=settings.general_settings_embed_field_translation_title,
                value=settings.
                f_general_settings_embed_field_translation_description(
                    get_translation(ctx.guild.id), await
                    get_prefix(ctx.message)))

            await message.edit(embed=embed)
Beispiel #7
0
__version__ = '1.0'
__license__ = "GNU AGPLv3 or Proprietary (see LICENSE.txt)"
__version_info__ = (1, 0)
__author__ = 'Dan McDougall <*****@*****.**>'

# Python stdlib
import os, sys, logging, re, time
from datetime import datetime, timedelta
from functools import partial

# Our stuff
from gateone import BaseHandler
from utils import get_translation, mkdir_p, shell_command, which, json_encode
from utils import noop

_ = get_translation()

# Tornado stuff
import tornado.web
import tornado.ioloop

# Globals
OPENSSH_VERSION = None
DROPBEAR_VERSION = None
OPEN_SUBCHANNELS = {}
SUBCHANNEL_TIMEOUT = timedelta(
    minutes=5)  # How long to wait before auto-closing
READY_STRING = "GATEONE_SSH_EXEC_CMD_CHANNEL_READY"
READY_MATCH = re.compile("^%s$" % READY_STRING, re.MULTILINE)
OUTPUT_MATCH = re.compile("^{rs}.+^{rs}$".format(rs=READY_STRING),
                          re.MULTILINE | re.DOTALL)
Beispiel #8
0
before I attempt to get a custom service principal working with other
applications.  This is because SSH uses the HOST/ prinicipal which is often
taken care of automatically via most Kerberos management tools (including AD).
If you can get SSO working with SSH you can get SSO working with anything else.

Class Docstrings
================
"""

# Standard library modules
import logging, base64

# Import our own stuff
from utils import get_translation
# Enable localization support
_ = get_translation()

# 3rd party modules
import tornado.httpserver
import tornado.ioloop
import tornado.web
import kerberos

# NOTE: For some reason if I set this as just an 'object' it doesn't work.
class KerberosAuthMixin(tornado.web.RequestHandler):
    """
    Authenticates users via Kerberos-based Single Sign-On.  Requires that you
    define 'sso_realm' and 'sso_service' in your Tornado Application settings.
    For example::

        settings = dict(
Beispiel #9
0
def main():
    start = time.time()
    parser = args.parse_args()

    # run some checks on arguments
    check_args(parser)

    # format logging
    log_name = os.path.join(
        parser.run_log,
        '{}_run_log_{}.log'.format(parser.experiment,
                                   dt.now().strftime("%Y%m%d_%H%M")))

    log.basicConfig(filename=log_name,
                    format='%(asctime)s | %(name)s -- %(message)s',
                    level=log.INFO)
    os.chmod(log_name, parser.access_mode)

    # set devise to CPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    log.info("Starting experiment {} VN -> EN NMT on {}.".format(
        parser.experiment, device))

    # set seed for replication
    random.seed(parser.seed)
    np.random.seed(parser.seed)
    torch.manual_seed(parser.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(parser.seed)
    log.info("For reproducibility, the seed is set to {}.".format(parser.seed))

    # set file paths
    source_name = parser.source_name
    target_name = parser.target_name

    # get saved models dir
    base_saved_models_dir = parser.save_dir
    saved_models_dir = os.path.join(base_saved_models_dir,
                                    source_name + '2' + target_name)
    plots_dir = parser.plots_dir

    log.info("We will save the models in this directory: {}".format(
        saved_models_dir))
    log.info("We will save the plots in this directory: {}".format(plots_dir))

    # get data dir
    main_data_path = parser.data_dir
    path_to_train_data = {
        'source': main_data_path + 'train.' + source_name,
        'target': main_data_path + 'train.' + target_name
    }
    path_to_dev_data = {
        'source': main_data_path + 'dev.' + source_name,
        'target': main_data_path + 'dev.' + target_name
    }
    # get language objects
    saved_language_model_dir = os.path.join(saved_models_dir, 'lang_obj')

    # get dictionary of datasets
    dataset_dict = {
        'train':
        nmt_dataset.LanguagePair(source_name=source_name,
                                 target_name=target_name,
                                 filepath=path_to_train_data,
                                 lang_obj_path=saved_language_model_dir,
                                 minimum_count=1),
        'dev':
        nmt_dataset.LanguagePair(source_name=source_name,
                                 target_name=target_name,
                                 filepath=path_to_dev_data,
                                 lang_obj_path=saved_language_model_dir,
                                 minimum_count=1)
    }

    # get max sentence length by 99% percentile
    MAX_LEN = int(dataset_dict['train'].main_df['source_len'].quantile(0.9999))
    log.info("MAX_LEN (99th Percentile) = {}".format(MAX_LEN))
    batchSize = parser.batch_size
    log.info("Batch size = {}.".format(batchSize))

    dataloader_dict = {
        'train':
        DataLoader(dataset_dict['train'],
                   batch_size=batchSize,
                   collate_fn=partial(nmt_dataset.vocab_collate_func,
                                      MAX_LEN=MAX_LEN),
                   shuffle=True,
                   num_workers=0),
        'dev':
        DataLoader(dataset_dict['dev'],
                   batch_size=batchSize,
                   collate_fn=partial(nmt_dataset.vocab_collate_func,
                                      MAX_LEN=MAX_LEN),
                   shuffle=True,
                   num_workers=0)
    }

    # Configuration
    source_lang_obj = dataset_dict['train'].source_lang_obj
    target_lang_obj = dataset_dict['train'].target_lang_obj

    source_vocab = dataset_dict['train'].source_lang_obj.n_words
    target_vocab = dataset_dict['train'].target_lang_obj.n_words
    hidden_size = parser.hidden_size
    rnn_layers = parser.rnn_layers
    lr = parser.learning_rate
    longest_label = parser.longest_label
    gradient_clip = parser.gradient_clip
    num_epochs = parser.epochs

    log.info(
        "The source vocab ({}) has {} words and target vocab ({}) has {} words"
        .format(source_name, source_vocab, target_name, target_vocab))

    # encoder model
    encoder_rnn = nnet_models_new.EncoderRNN(input_size=source_vocab,
                                             hidden_size=hidden_size,
                                             numlayers=rnn_layers)
    # decoder model
    decoder_rnn = nnet_models_new.DecoderRNN(output_size=target_vocab,
                                             hidden_size=hidden_size,
                                             numlayers=rnn_layers)

    # seq2seq model
    nmt_rnn = nnet_models_new.seq2seq(
        encoder_rnn,
        decoder_rnn,
        lr=lr,
        hiddensize=hidden_size,
        numlayers=hidden_size,
        target_lang=dataset_dict['train'].target_lang_obj,
        longest_label=longest_label,
        clip=gradient_clip,
        device=device)

    log.info(
        "Seq2Seq Model with the following parameters: batch_size = {}, learning_rate = {}, hidden_size = {}, rnn_layers = {}, lr = {}, longest_label = {}, gradient_clip = {}, num_epochs = {}, source_name = {}, target_name = {}"
        .format(batchSize, lr, hidden_size, rnn_layers, lr, longest_label,
                gradient_clip, num_epochs, source_name, target_name))

    # do we want to train again?
    train_again = False

    saved_file_name = 'no_attn_bs{}_lr{}_hs_{}_rnnlayer{}'.format(
        batchSize, lr, hidden_size, rnn_layers)

    # check if there is a saved model and if we want to train again
    if os.path.exists(utils.get_full_filepath(saved_models_dir,
                                              'rnn')) and (not train_again):
        log.info("Retrieving saved model from {}".format(
            utils.get_full_filepath(saved_models_dir, 'rnn')))
        nmt_rnn = torch.load(utils.get_full_filepath(saved_models_dir, 'rnn'),
                             map_location=global_variables.device)
    # train model again
    else:
        log.info("Check if this path exists: {}".format(
            utils.get_full_filepath(saved_models_dir, saved_file_name)))
        log.info("It does not exist! Starting to train...")
        utils.train_model(dataloader_dict,
                          nmt_rnn,
                          num_epochs=num_epochs,
                          saved_model_path=saved_models_dir,
                          enc_type=saved_file_name)
    log.info("Total time is: {} min : {} s".format((time.time() - start) // 60,
                                                   (time.time() - start) % 60))
    log.info("We will save the models in this directory: {}".format(
        saved_models_dir))

    # generate translations
    use_cuda = True
    utils.get_translation(nmt_rnn, 'I love to watch science movies on Mondays',
                          source_lang_obj, use_cuda, source_name, target_name)
    utils.get_translation(nmt_rnn,
                          'I want to be the best friend that I can be',
                          source_lang_obj, use_cuda, source_name, target_name)
    utils.get_translation(nmt_rnn, 'I love you', source_lang_obj, use_cuda,
                          source_name, target_name)
    utils.get_translation(
        nmt_rnn,
        'I love football, I like to watch it with my friends. It is always a great time.',
        source_lang_obj, use_cuda, source_name, target_name)
    utils.get_translation(
        nmt_rnn,
        'I do not know what I would do without pizza, it is very tasty to eat. If I could have any food in the world it would probably be pizza.',
        source_lang_obj, use_cuda, source_name, target_name)
    utils.get_translation(
        nmt_rnn,
        'Trump is the worst president in all of history. He can be a real racist and say very nasty things to people of color.',
        source_lang_obj, use_cuda, source_name, target_name)
    utils.get_translation(nmt_rnn, 'Thank you very much.', source_lang_obj,
                          use_cuda, source_name, target_name)
    utils.get_translation(nmt_rnn, 'Think about your own choices.',
                          source_lang_obj, use_cuda, source_name, target_name)
    utils.get_translation(
        nmt_rnn,
        'I recently did a survey with over 2,000 Americans , and the average number of choices that the typical American reports making is about 70 in a typical day .',
        source_lang_obj, use_cuda, source_name, target_name)

    # export plot
    log.info("Exported Binned Bleu Score Plot to {}!".format(plots_dir))
    _, _, fig = utils.get_binned_bl_score(nmt_rnn,
                                          dataset_dict['dev'],
                                          plots_dir,
                                          batchSize=batchSize)
Beispiel #10
0
from utils import get_translation, Generator, sp
import sentencepiece as spm

data_token_path = "./en_hi_t2t_v3/token_data/"
model_checkpoint = "./en_hi_t2t_v3/checkpoints/checkpoint9.pt"
sp_path = "./sentencepiece.bpe.model"
srclang = "en"  ### hi for hi2en

gen = Generator(data_token_path, model_checkpoint)
print("Model Checkpoint Load Complete")
sp.load(sp_path)
print("Tokenizer Load Complete\n")

while True:
    text = input("English Text : ")
    outtext = get_translation(gen, sp, text, srclang)
    print("Hindi Text : %s\n" % outtext)
Beispiel #11
0
from tornado.options import define, options
# You need 'options' to get define()'d values

# Globals
SESSIONS = {}  # This will get replaced with gateone.py's SESSIONS dict
# NOTE: The overwriting of SESSIONS happens inside of gateone.py as part of
# the application initialization process.
APPLICATION_PATH = os.path.split(__file__)[0]  # Path to our application
web_handlers = []  # Populated at the bottom of this file
example_log = go_logger("gateone.example")  # Our app's logger
# NOTE: You can pass additional metadata to logs which will be JSON-encoded
# when your messages are logged.  Examples of how to do this are further along
# in this file...

# Localization support
_ = get_translation()  # You don't *have* to do this but it is a good idea

# This is how you add command-line options to Gate One:
define(
    "example_option",  # NOTE: underscores are the preferred word separator here
    default=True,
    help=_("Doesn't do anything (just an example from the example app)."))
# You could then reference this option like so:
# print(options.example_option)

ALLOW = True  # Used by polcy_example() below


def policy_test_example(cls, policy):
    """
    An example policy-checking function.  It will return ``True`` if conditions
Beispiel #12
0
 def getTranslation(self, text):
     return get_translation(self.gen, self.tokObj.sp, text, "hi")
Beispiel #13
0
from tornado.options import define, options
# You need 'options' to get define()'d values

# Globals
SESSIONS = {} # This will get replaced with gateone.py's SESSIONS dict
# NOTE: The overwriting of SESSIONS happens inside of gateone.py as part of
# the application initialization process.
APPLICATION_PATH = os.path.split(__file__)[0] # Path to our application
web_handlers = [] # Populated at the bottom of this file
example_log = go_logger("gateone.example") # Our app's logger
# NOTE: You can pass additional metadata to logs which will be JSON-encoded
# when your messages are logged.  Examples of how to do this are further along
# in this file...

# Localization support
_ = get_translation() # You don't *have* to do this but it is a good idea

# This is how you add command-line options to Gate One:
define(
    "example_option", # NOTE: underscores are the preferred word separator here
    default=True,
    help=_("Doesn't do anything (just an example from the example app).")
)
# You could then reference this option like so:
# print(options.example_option)

ALLOW = True # Used by polcy_example() below
def policy_test_example(cls, policy):
    """
    An example policy-checking function.  It will return ``True`` if conditions
    are met.  ``False`` if not.