コード例 #1
0
def compare():
    """Compare two sentences separated by a semi-colon"""
    # Load the data frame
    train, dev, test = loader.getData()

    with tf.Session() as sess:
        # Create model and load parameters.
        model = create_model(sess, True, train_dir=TRAIN_DIR)
        model.batch_size = 64  # We decode one sentence at a time.

        # Load vocabularies.
        en_vocab = get_english_vocab(DATA_DIR, VOCAB_SIZE)

        results = []
        for i, row in train.iterrows():
            try:
                context1 = get_context(sess, model, en_vocab,
                                       row["sentence1"])[0]
                context2 = get_context(sess, model, en_vocab,
                                       row["sentence2"])[0]
            except TypeError:
                print "Error on line %i" % i
                continue
            cosine_distance = cosine(context1, context2)
            euclid_distance = np.linalg.norm(context1 - context2)
            prediction = euclid_distance < 10
            correctness = prediction == row["label"]
            results.append(correctness)
            print "%i,  %i,   %.3f" % (row["label"], prediction,
                                       euclid_distance)
            # Print the accuracy so far
            if i % 10 == 0:
                print "Correctness:", np.mean(results)
        results = np.array(results)
        print np.mean(results)
コード例 #2
0
 def __str__(self):
     if len(self.simgr.active) > 0:
         if self.state is None:
             return get_context(self.proj, self.simgr.active[0])
         return get_context(self.proj, self.inspect_simgr.active[0])
     else:
         return "No active states."
コード例 #3
0
ファイル: __init__.py プロジェクト: ecostadaluz/core
def help():
    """Devolve a pagina de Ajuda"""
    window_id = str(get_window_id())
    set_base_context(window_id)
    ctx_dict = get_context(window_id)
    ctx_dict['window_id'] = window_id
    ctx_dict['name'] = 'help'
    ctx_dict['title'] = 'Ajuda'


    code = """
        <textarea rows="30" class="small-12 large-12 columns">
    """
    code += """
    Ajuda

    Por Implementar...

    """
    code += """
        </textarea>
    """
    ctx_dict['form'] = code
    set_context(window_id, ctx_dict)
    return ctx_dict
コード例 #4
0
def load_and_evaluate(args):
    """Load the pretrained model and run evaluate."""
    context = utils.get_context(args)
    embedding, model_idx_to_token = get_model(args)

    idx_to_token_set = evaluation.get_tokens_in_evaluation_datasets(args)
    idx_to_token_set.update(model_idx_to_token)
    idx_to_token = list(idx_to_token_set)

    # Compute their word vectors
    token_embedding = embedding.to_token_embedding(idx_to_token,
                                                   ctx=context[0])

    os.makedirs(args.logdir, exist_ok=True)
    results = evaluation.evaluate_similarity(args,
                                             token_embedding,
                                             context[0],
                                             logfile=os.path.join(
                                                 args.logdir,
                                                 'similarity.tsv'))
    results += evaluation.evaluate_analogy(args,
                                           token_embedding,
                                           context[0],
                                           logfile=os.path.join(
                                               args.logdir, 'analogy.tsv'))
コード例 #5
0
    def get(self):
        context = utils.get_context(self.request)
        insert_details(context)

        #
        current_session = auth.get_auth().get_user_by_session()
        new_user_object = self.auth.store.user_model.get_by_auth_token(
            current_session['user_id'], current_session['token'])[0]
        username = new_user_object.auth_ids[0]
        old_user_object = models.Account.all().filter(
            'username ='******'_entity']

        if 'user' in self.request.GET:
            new_user_object = self.auth.store.user_model.get_by_auth_id(
                self.request.GET['user'])
            old_user_object = models.Account.all().filter(
                'username ='******'user']).fetch(1)[0].__dict__['_entity']
            username = new_user_object.auth_ids[0]

        context['current_session'] = current_session
        context['new_user_object'] = new_user_object
        context['user'] = username
        context['old_user_object'] = old_user_object

        path = os.path.join(os.path.dirname(__file__),
                            'templates/auth_test.html')
        self.response.out.write(template.render(path, context))
コード例 #6
0
ファイル: main.py プロジェクト: breber/cdcsite
 def get(self):
     context = utils.get_context(self.auth)
     resumes = models.Resume.all().fetch(10000)
     context['resumes'] = resumes
     path = os.path.join(os.path.dirname(__file__),
                         'templates/view_resumes.html')
     self.response.out.write(template.render(path, context))
コード例 #7
0
ファイル: main.py プロジェクト: breber/cdcsite
 def get(self):
     context = utils.get_context(self.auth)
     upload_url = blobstore.create_upload_url('/upload')
     upload_url = upload_url.replace('http://localhost:8080', self.request.get('host'))
     context['upload_url'] = upload_url
     path = os.path.join(os.path.dirname(__file__), 'templates/apply.html')
     self.response.out.write(template.render(path, context))
コード例 #8
0
def evaluate(args, model, vocab, global_step, eval_analogy=False):
    """Evaluation helper"""
    if 'eval_tokens' not in globals():
        global eval_tokens

        eval_tokens_set = evaluation.get_tokens_in_evaluation_datasets(args)
        if not args.no_eval_analogy:
            eval_tokens_set.update(vocab.idx_to_token)

        # GloVe does not support computing vectors for OOV words
        eval_tokens_set = filter(lambda t: t in vocab, eval_tokens_set)

        eval_tokens = list(eval_tokens_set)

    # Compute their word vectors
    context = get_context(args)
    mx.nd.waitall()

    token_embedding = nlp.embedding.TokenEmbedding(unknown_token=None,
                                                   allow_extend=True)
    token_embedding[eval_tokens] = model[eval_tokens]

    results = evaluation.evaluate_similarity(
        args, token_embedding, context[0], logfile=os.path.join(
            args.logdir, 'similarity.tsv'), global_step=global_step)
    if eval_analogy:
        assert not args.no_eval_analogy
        results += evaluation.evaluate_analogy(
            args, token_embedding, context[0], logfile=os.path.join(
                args.logdir, 'analogy.tsv'))

    return results
コード例 #9
0
ファイル: __init__.py プロジェクト: ecostadaluz/core
def about():
    """Devolve a pagina about"""
    window_id = str(get_window_id())
    set_base_context(window_id)
    ctx_dict = get_context(window_id)
    ctx_dict['window_id'] = window_id
    ctx_dict['name'] = 'about'
    ctx_dict['title'] = 'Sobre'
    code = """
        <div class="small-12 large-12 columns">
        <textarea rows="30" readonly>
    """
    code += """
    Sobre o ERP+

    Versão 1.0 de 2015

    O ERP + é uma plataforma de Gestão sobre a qual qualquer pessoa pode desenvolver
    objectos que suportem o seu negócio ou actividade.

    Bom trabalho

    Contactos:

    Dario Costa
    +238 983 04 90

    """
    code += """
        </textarea>
        </div>
    """
    ctx_dict['form'] = code
    set_context(window_id, ctx_dict)
    return ctx_dict
コード例 #10
0
def evaluate(args, embedding, vocab, global_step, eval_analogy=False):
    """Evaluation helper"""
    if 'eval_tokens' not in globals():
        global eval_tokens

        eval_tokens_set = evaluation.get_tokens_in_evaluation_datasets(args)
        if not args.no_eval_analogy:
            eval_tokens_set.update(vocab.idx_to_token)
        eval_tokens = list(eval_tokens_set)

    os.makedirs(args.logdir, exist_ok=True)

    # Compute their word vectors
    context = get_context(args)
    idx_to_token = eval_tokens
    mx.nd.waitall()
    token_embedding = embedding.to_token_embedding(idx_to_token,
                                                   ctx=context[0])

    results = evaluation.evaluate_similarity(args,
                                             token_embedding,
                                             context[0],
                                             logfile=os.path.join(
                                                 args.logdir,
                                                 'similarity.tsv'),
                                             global_step=global_step)
    if eval_analogy:
        assert not args.no_eval_analogy
        results += evaluation.evaluate_analogy(args,
                                               token_embedding,
                                               context[0],
                                               logfile=os.path.join(
                                                   args.logdir, 'analogy.tsv'))

    return results
コード例 #11
0
 def get(self):
     context = utils.get_context(self.request)
     insert_details(context)
     resumes = models.Resume.all().fetch(10000)
     context['resumes'] = resumes
     path = os.path.join(os.path.dirname(__file__),
                         'templates/view_resumes.html')
     self.response.out.write(template.render(path, context))
コード例 #12
0
ファイル: main.py プロジェクト: breber/cdcsite
    def get(self):
        #mike_exists = models.Account.all().filter('username ='******'mike')
        #if not mike_exists.count():
        #    account = models.Account(username="******", password="******", given_name="Michael", is_admin=True, is_employee=True, ssn='999999999')
        #    account.save()

        context = utils.get_context(self.auth)
        path = os.path.join(os.path.dirname(__file__), 'templates/home.html')
        self.response.out.write(template.render(path, context))
コード例 #13
0
ファイル: main.py プロジェクト: breber/cdcsite
    def get(self):
        context = utils.get_context(self.auth)

        if context['is_employee']:
            employee = models.Account.all().filter('username ='******'username'])[0]
            context['employee'] = employee

        path = os.path.join(os.path.dirname(__file__), 'templates/profile.html')
        self.response.out.write(template.render(path, context))
コード例 #14
0
 def get(self):
     context = utils.get_context(self.request)
     insert_details(context)
     upload_url = blobstore.create_upload_url('/upload')
     upload_url = upload_url.replace('http://localhost:8080',
                                     self.request.get('host'))
     context['upload_url'] = upload_url
     path = os.path.join(os.path.dirname(__file__), 'templates/apply.html')
     self.response.out.write(template.render(path, context))
コード例 #15
0
def Lesk_algorithm(word, sentence_tokens):
    synset = wn.synsets(word)
    best_sense = synset[0]
    max_olp = 0
    sentence_context = utils.get_context(
        sentence_tokens)  # estrae contesto della frase

    for sense in synset:
        sense_examples = utils.get_examples(
            sense
        )  # prende esempio e glossa se non c'è, prende solo la glossa (sarà una frase che determinerà il contesto per quel particolare senso)
        sense_context = utils.get_context(
            word_tokenize(sense_examples)
        )  # estrae il contesto dall'esempio e la glossa per quel particolare senso

        olp = max_overlap(sentence_context, sense_context)
        if max_olp < olp:
            max_olp = olp
            best_sense = sense
    return best_sense
コード例 #16
0
ファイル: main.py プロジェクト: breber/webcal-proxy
    def get(self):
        context = utils.get_context()
        user = users.get_current_user()
        path = os.path.join(os.path.dirname(__file__), 'templates/welcome.html')
        
        if user:
            calendars = models.Calendar.query(models.Calendar.owner == user.user_id())
            context['records'] = ndb.get_multi(calendars.fetch(keys_only=True))
            path = os.path.join(os.path.dirname(__file__), 'templates/main.html')

        self.response.out.write(template.render(path, context))
コード例 #17
0
def test_get_context():
    inputs = [
        "context",
    ]

    def mock_input(s):
        return inputs.pop(0)

    utils.input = mock_input
    a = utils.get_context()
    if not a == 'context':
        raise AssertionError()
コード例 #18
0
    def get(self):
        context = utils.get_context(self.request)
        insert_details(context)

        if context['is_employee']:
            employee = models.Account.all().filter('username ='******'username'])[0]
            context['employee'] = employee

        path = os.path.join(os.path.dirname(__file__),
                            'templates/profile.html')
        self.response.out.write(template.render(path, context))
コード例 #19
0
ファイル: main.py プロジェクト: breber/cdcsite
    def get(self):
        context = utils.get_context(self.auth)

        if context['is_admin']:
            employee_query = models.Account.all().filter('is_employee =', True)
            employees = employee_query.fetch(1000)
            context['employees'] = employees
            path = os.path.join(os.path.dirname(__file__),
                                'templates/directory.html')
            self.response.out.write(template.render(path, context))

        else:
            path = os.path.join(os.path.dirname(__file__),
                                'templates/error_no_permission.html')
            self.response.out.write(template.render(path, context))
コード例 #20
0
ファイル: main.py プロジェクト: breber/cdcsite
    def get(self):
        context = utils.get_context(self.auth)

        if context['is_admin']:
            customer_query = models.Account.all().filter('is_customer =', True)
            customers = customer_query.fetch(1000)
            context['customers'] = customers
            path = os.path.join(os.path.dirname(__file__),
                'templates/customers.html')
            self.response.out.write(template.render(path, context))

        else:
            path = os.path.join(os.path.dirname(__file__),
                'templates/error_no_permission.html')
            self.response.out.write(template.render(path, context))
コード例 #21
0
def comparison_task(sess, model=None):
    """Compare the encoder state for two different English sentences
  Cosine similarity is used as the distance metric
  """
    sentences = [
        "There was a man with a red hat", "There was a man with a blue hat",
        "A man with a blue hat was there",
        "The sky is blue and the grass is green",
        "Why are you asking about the history of my life"
    ]
    # Load the vocab
    en_vocab_path = os.path.join(FLAGS.data_dir,
                                 "vocab%d.from" % FLAGS.from_vocab_size)
    en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path)

    # Create model and load parameters.
    if model == None:
        model = create_model(sess, True)

    # Persist the original batch size
    original_batch_size = model.batch_size

    model.batch_size = 1  # We decode one sentence at a time.

    # Get all of the context vectors
    context_vectors = []
    for sentence in sentences:
        context_vector = get_context(sess, model, en_vocab, sentence)
        context_vectors.append(context_vector)

    # Calculate the similarity matrix
    similarity = np.zeros((len(sentences), len(sentences)))
    for i in range(len(sentences)):
        for j in range(len(sentences)):
            similarity[i, j] = cosine_similarity(context_vectors[i],
                                                 context_vectors[j])

    # Dislay the output
    print(80 * "=")
    print("For the following sentences:\n")
    for i, sentence in enumerate(sentences):
        print(i, sentence)
    print("The similarity matrix is:\n")
    print(similarity, "\n")
    print(80 * "=")

    # Reset the model
    model.batch_size = original_batch_size
コード例 #22
0
ファイル: main.py プロジェクト: michaelore/cdc2012
    def get(self):

        #mike_exists = models.Account.all().filter('username ='******'mike')
        #if not mike_exists.count():
        #    account = models.Account(username="******", password="******", given_name="Michael", is_admin=True, is_employee=True, ssn='999999999')
        #    account.save()

        context = utils.get_context(self.request)
        insert_details(context)
        #if 'user' in self.request.GET:
        #    new_user_object = self.auth.store.user_model.get_by_auth_id(self.request.GET['user'])
        #    old_user_object = models.Account.all().filter('username ='******'user']).fetch(1)[0].__dict__['_entity']
        #    username = new_user_object.auth_ids[0]

        path = os.path.join(os.path.dirname(__file__), 'templates/home.html')
        self.response.out.write(template.render(path, context))
コード例 #23
0
    def get(self):

        #mike_exists = models.Account.all().filter('username ='******'mike')
        #if not mike_exists.count():
        #    account = models.Account(username="******", password="******", given_name="Michael", is_admin=True, is_employee=True, ssn='999999999')
        #    account.save()

        context = utils.get_context(self.request)
        insert_details(context)
        #if 'user' in self.request.GET:
        #    new_user_object = self.auth.store.user_model.get_by_auth_id(self.request.GET['user'])
        #    old_user_object = models.Account.all().filter('username ='******'user']).fetch(1)[0].__dict__['_entity']
        #    username = new_user_object.auth_ids[0]

        path = os.path.join(os.path.dirname(__file__), 'templates/home.html')
        self.response.out.write(template.render(path, context))
コード例 #24
0
    def get(self):
        context = utils.get_context(self.request)
        insert_details(context)

        if context['is_admin']:
            customer_query = models.Account.all().filter('is_customer =', True)
            customers = customer_query.fetch(1000)
            context['customers'] = customers
            path = os.path.join(os.path.dirname(__file__),
                                'templates/customers.html')
            self.response.out.write(template.render(path, context))

        else:
            path = os.path.join(os.path.dirname(__file__),
                                'templates/error_no_permission.html')
            self.response.out.write(template.render(path, context))
コード例 #25
0
ファイル: __init__.py プロジェクト: ecostadaluz/core
def main():
    """Funçao index"""
    print('Init do main_route')
    window_id = str(get_window_id())
    print(window_id)
    set_base_context(window_id)
    print('oi')
    ctx_dict = get_context(window_id)
    print(ctx_dict)
    ctx_dict['window_id'] = window_id
    ctx_dict['name'] = 'index'
    ctx_dict['title'] = 'ERP +'
    ctx_dict['form'] = ''
    print(ctx_dict)
    set_context(window_id, ctx_dict)
    return ctx_dict
コード例 #26
0
    def get(self):
        context = utils.get_context(self.request)
        insert_details(context)

        if context['is_admin']:
            employee_query = models.Account.all().filter(
                'is_employee =', True)  #.filter('username !=', 'mike')
            employees = employee_query.fetch(1000)
            context['employees'] = employees
            path = os.path.join(os.path.dirname(__file__),
                                'templates/directory.html')
            self.response.out.write(template.render(path, context))

        else:
            path = os.path.join(os.path.dirname(__file__),
                                'templates/error_no_permission.html')
            self.response.out.write(template.render(path, context))
コード例 #27
0
ファイル: __init__.py プロジェクト: ecostadaluz/core
def login_submit():
    """Valida o Login"""
    print('Im on login submit')
    window_id = request.forms.get('window_id')

    #este código elimina os dicionarios json que vão sendo criados para guardara informação contextual
    now = time.time()
    path = '/var/www/tmp/'
    for f in os.listdir(path):
        if os.stat(os.path.join(path,f)).st_mtime < now - 86400:
            os.remove(os.path.join(path, f))

    import base64
    from users import Users
    user = request.forms.get('login')
    password = request.forms.get('password')
    print('before db request')
    db_user = Users(where="login = '******'".format(user=user)).get()
    autenticated = False
    print('1', user, db_user)
    if db_user:
        db_user = db_user[0]
        if base64.decodestring(db_user['password'].encode('utf-8')).decode('utf-8')[6:] == password:
            print('o utilizador {user} autenticou-se com sucesso!'.format(user=db_user['nome']))
            request.session['user'] = db_user['id']
            request.session['user_name'] = db_user['nome']
            request.session.save()
            autenticated = True
    print('2')
    if not autenticated:
        return HTTPResponse(status=500, output='Autenticação Inválida!!!')
    else:
        #print('estou autenticado')
        if window_id:
            #print('tenho window_id')
            ctx_dict = get_context(window_id)
            if 'redirect_url' in ctx_dict:
                #print('tenho redirect'+str(ctx_dict['redirect_url']))
                return ctx_dict['redirect_url']
            else:
                return '/'
        else:
            #print('nao tenho window_id')
            return '/'
    print('end')
コード例 #28
0
ファイル: parse_albums.py プロジェクト: tkell/parse-music
def parse_folder(path, parsers):
    context = get_context(path)

    parser = None
    for p in parsers:
        album_string = path.split(os.path.sep)[-1]
        if p.match_store(album_string, source='album'):
            parser = p
            break
    if parser == None:
        # Eventually we'll deal with errors here,
        # allow the user to enter a store manually, etc
        print("Panic!  No parser found")
    else:
        album_info = {}
        album_info['artist'] = parser.get_field(path, 'artist', 'album')
        album_info['album_title'] = parser.get_field(path, 'album_title', 'album')
        album_info['label'] = parser.get_field(path, 'label', 'album')

    return path, parser, context, album_info
コード例 #29
0
ファイル: __init__.py プロジェクト: ecostadaluz/core
def licence():
    """Devolve a pagina da Licença"""
    window_id = str(get_window_id())
    set_base_context(window_id)
    ctx_dict = get_context(window_id)
    ctx_dict['window_id'] = window_id
    ctx_dict['name'] = 'licence'
    ctx_dict['title'] = 'Licença'

    licence_file = open('/var/www/core/help/licence.txt', 'r', encoding='utf8')
    code = """
        <textarea rows="30" class="small-12 large-12 columns">
    """
    code += licence_file.read()
    code += """
        </textarea>
    """
    ctx_dict['form'] = code
    set_context(window_id, ctx_dict)
    return ctx_dict
コード例 #30
0
 def secure(*args, **kargs):
     #print ('inicio do secure do verify_form_rights')
     from utils import get_context, set_context
     import objs
     window_id = kargs.get('window_id')
     #print (window_id)
     ctx_dict = get_context(window_id)
     #print ('ctx_dict no verify_form_rights', ctx_dict)
     model_name = ctx_dict.get('model_name')
     model = eval("""objs.{model_name}()""".format(model_name=model_name))
     result = verify_rights(model=model, action=target.__name__)
     #print ('2')
     if result == True:
         return target(*args, **kargs)
     elif isinstance(result, list):
         ctx_dict['rights'] = result
         set_context(window_id, ctx_dict)
         #print ('fim do secure de form, vou carregar o objecto')
         return target(*args, **kargs)
     else:
         return result
コード例 #31
0
ファイル: main.py プロジェクト: michaelore/cdc2012
    def get(self):
        context = utils.get_context(self.request)
        insert_details(context)

        #
        current_session = auth.get_auth().get_user_by_session()
        new_user_object = self.auth.store.user_model.get_by_auth_token(current_session['user_id'], current_session['token'])[0]
        username = new_user_object.auth_ids[0]
        old_user_object = models.Account.all().filter('username ='******'_entity']

        if 'user' in self.request.GET:
            new_user_object = self.auth.store.user_model.get_by_auth_id(self.request.GET['user'])
            old_user_object = models.Account.all().filter('username ='******'user']).fetch(1)[0].__dict__['_entity']
            username = new_user_object.auth_ids[0]


        context['current_session'] = current_session
        context['new_user_object'] = new_user_object
        context['user'] = username
        context['old_user_object'] = old_user_object

        path = os.path.join(os.path.dirname(__file__),
            'templates/auth_test.html')
        self.response.out.write(template.render(path, context))
コード例 #32
0
ファイル: __init__.py プロジェクト: ecostadaluz/core
def update():
    """Devolve a pagina de Actualizaçao da Implementacao Local"""
    window_id = str(get_window_id())
    set_base_context(window_id)
    ctx_dict = get_context(window_id)
    ctx_dict['window_id'] = window_id
    ctx_dict['name'] = 'update'
    ctx_dict['title'] = 'Actualização'

    code = """
        <textarea rows="30" class="small-12 large-12 columns">
    """
    code += """
    Actualização

    Por Implementar...

    """
    code += """
        </textarea>
    """
    ctx_dict['form'] = code
    set_context(window_id, ctx_dict)
    return ctx_dict
コード例 #33
0
#
files = os.listdir(path)   
num_img = len(files)
maxconf = np.empty([num_img-1])
conftmp = []

for frame in range(1, num_img):
    sigma = sigma * scale
    window = hamming_window * np.exp(-0.5 / (sigma * sigma) * dist)
    window = hamming_window / window.sum()
    # load image
    img = cv2.imread(path + '\\' + files[frame-1])
    if img.shape[2] > 1:
        im = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    sz = [size_x, size_y]
    contextprior = get_context(im, pos, sz, window)
    #
    if frame > 1:
        # calculate response of the confidence map at all locations
        confmap = np.fft.ifft2(Hstcf * np.fft.fft2(contextprior))
        confmap = np.real(confmap)
        #target location is at the maximum response
        [row, col] = np.unravel_index(np.argmax(confmap), confmap.shape)
        row = row + 1
        col = col + 1
        pos = [pos[0]-size_x/2+row, pos[1]-size_y/2+col]
        sz = [size_x, size_y]
        print('frame:', frame)
        # print('pos:', pos)
        # print('row:', row)
        # print('col:', col)
コード例 #34
0
def train(args):
    """Training helper."""
    if not args.model.lower() in ['cbow', 'skipgram']:
        logging.error('Unsupported model %s.', args.model)
        sys.exit(1)

    if args.data.lower() == 'toy':
        data = mx.gluon.data.SimpleDataset(nlp.data.Text8(segment='train')[:2])
        data, vocab, idx_to_counts = preprocess_dataset(
            data, max_vocab_size=args.max_vocab_size)
    elif args.data.lower() == 'text8':
        data = nlp.data.Text8(segment='train')
        data, vocab, idx_to_counts = preprocess_dataset(
            data, max_vocab_size=args.max_vocab_size)
    elif args.data.lower() == 'fil9':
        data = nlp.data.Fil9(max_sentence_length=10000)
        data, vocab, idx_to_counts = preprocess_dataset(
            data, max_vocab_size=args.max_vocab_size)
    elif args.data.lower() == 'wiki':
        data, vocab, idx_to_counts = wiki(args.wiki_root, args.wiki_date,
                                          args.wiki_language,
                                          args.max_vocab_size)

    if args.ngram_buckets > 0:
        data, batchify_fn, subword_function = transform_data_fasttext(
            data,
            vocab,
            idx_to_counts,
            cbow=args.model.lower() == 'cbow',
            ngram_buckets=args.ngram_buckets,
            ngrams=args.ngrams,
            batch_size=args.batch_size,
            window_size=args.window,
            frequent_token_subsampling=args.frequent_token_subsampling)
    else:
        subword_function = None
        data, batchify_fn = transform_data_word2vec(
            data,
            vocab,
            idx_to_counts,
            cbow=args.model.lower() == 'cbow',
            batch_size=args.batch_size,
            window_size=args.window,
            frequent_token_subsampling=args.frequent_token_subsampling)

    num_tokens = float(sum(idx_to_counts))

    model = CBOW if args.model.lower() == 'cbow' else SG
    embedding = model(token_to_idx=vocab.token_to_idx,
                      output_dim=args.emsize,
                      batch_size=args.batch_size,
                      num_negatives=args.negative,
                      negatives_weights=mx.nd.array(idx_to_counts),
                      subword_function=subword_function)
    context = get_context(args)
    embedding.initialize(ctx=context)
    if not args.no_hybridize:
        embedding.hybridize(static_alloc=True, static_shape=True)

    optimizer_kwargs = dict(learning_rate=args.lr)
    try:
        trainer = mx.gluon.Trainer(embedding.collect_params(), args.optimizer,
                                   optimizer_kwargs)
    except ValueError as e:
        if args.optimizer == 'groupadagrad':
            logging.warning('MXNet <= v1.3 does not contain '
                            'GroupAdaGrad support. Falling back to AdaGrad')
            trainer = mx.gluon.Trainer(embedding.collect_params(), 'adagrad',
                                       optimizer_kwargs)
        else:
            raise e

    try:
        if args.no_prefetch_batch:
            data = data.transform(batchify_fn)
        else:
            from executors import LazyThreadPoolExecutor
            num_cpu = len(os.sched_getaffinity(0))
            ex = LazyThreadPoolExecutor(num_cpu)
    except (ImportError, SyntaxError, AttributeError):
        # Py2 - no async prefetching is supported
        logging.warning(
            'Asynchronous batch prefetching is not supported on Python 2. '
            'Consider upgrading to Python 3 for improved performance.')
        data = data.transform(batchify_fn)

    num_update = 0
    prefetched_iters = []
    for _ in range(min(args.num_prefetch_epoch, args.epochs)):
        prefetched_iters.append(iter(data))
    for epoch in range(args.epochs):
        if epoch + len(prefetched_iters) < args.epochs:
            prefetched_iters.append(iter(data))
        data_iter = prefetched_iters.pop(0)
        try:
            batches = ex.map(batchify_fn, data_iter)
        except NameError:  # Py 2 or batch prefetching disabled
            batches = data_iter

        # Logging variables
        log_wc = 0
        log_start_time = time.time()
        log_avg_loss = 0

        for i, batch in enumerate(batches):
            ctx = context[i % len(context)]
            batch = [array.as_in_context(ctx) for array in batch]
            with mx.autograd.record():
                loss = embedding(*batch)
            loss.backward()

            num_update += loss.shape[0]
            if len(context) == 1 or (i + 1) % len(context) == 0:
                trainer.step(batch_size=1)

            # Logging
            log_wc += loss.shape[0]
            log_avg_loss += loss.mean().as_in_context(context[0])
            if (i + 1) % args.log_interval == 0:
                # Forces waiting for computation by computing loss value
                log_avg_loss = log_avg_loss.asscalar() / args.log_interval
                wps = log_wc / (time.time() - log_start_time)
                # Due to subsampling, the overall number of batches is an upper
                # bound
                num_batches = num_tokens // args.batch_size
                if args.model.lower() == 'skipgram':
                    num_batches = (num_tokens * args.window *
                                   2) // args.batch_size
                else:
                    num_batches = num_tokens // args.batch_size
                logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, '
                             'throughput={:.2f}K wps, wc={:.2f}K'.format(
                                 epoch, i + 1, num_batches, log_avg_loss,
                                 wps / 1000, log_wc / 1000))
                log_start_time = time.time()
                log_avg_loss = 0
                log_wc = 0

            if args.eval_interval and (i + 1) % args.eval_interval == 0:
                with print_time('mx.nd.waitall()'):
                    mx.nd.waitall()
                with print_time('evaluate'):
                    evaluate(args, embedding, vocab, num_update)

    # Evaluate
    with print_time('mx.nd.waitall()'):
        mx.nd.waitall()
    with print_time('evaluate'):
        evaluate(args,
                 embedding,
                 vocab,
                 num_update,
                 eval_analogy=not args.no_eval_analogy)

    # Save params
    with print_time('save parameters'):
        embedding.save_parameters(os.path.join(args.logdir,
                                               'embedding.params'))
コード例 #35
0
ファイル: example_crbm.py プロジェクト: OwaJawa/morb
plt.ion()

from utils import generate_data, get_context

# DEBUGGING

from theano import ProfileMode
# mode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
# mode = theano.compile.DebugMode(check_py_code=False, require_matching_strides=False)
mode = None


# generate data
print ">> Generating dataset..."
data = generate_data(1000) # np.random.randint(2, size=(10000, n_visible))
data_context = get_context(data)

data_train = data[:-1000, :]
data_eval = data[-1000:, :]
data_context_train = data_context[:-1000, :]
data_context_eval = data_context[-1000:, :]

n_visible = data.shape[1]
n_context = data_context.shape[1]
n_hidden = 100


print ">> Constructing RBM..."
rbm = rbms.BinaryBinaryCRBM(n_visible, n_hidden, n_context)
initial_vmap = { rbm.v: T.matrix('v'), rbm.x: T.matrix('x') }
コード例 #36
0
ファイル: app.py プロジェクト: srayhit/happiness_seeker
def webhook():
    data = request.get_json()
    log(data)
    messaging_text = None
    audio_link = None
    audio_resp = None

    if data['object'] == 'page':
        for entry in data['entry']:
            for messaging_event in entry['messaging']:

                # ID's
                sender_id = messaging_event['sender']['id']
                recipient_id = messaging_event['recipient']['id']

                if messaging_event.get('message'):
                    print(messaging_event['message'])
                    if 'text' in messaging_event['message']:
                        messaging_text = messaging_event['message']['text']
                    else:
                        # print("No message")
                        messaging_text = None
                    if 'attachments' in messaging_event['message']:
                        # print('attachments')
                        # print(messaging_event['message']['attachments'][0]['type'])
                        if messaging_event['message']['attachments'][0][
                                'type'] == "audio":
                            audio_link = messaging_event['message'][
                                'attachments'][0]['payload']['url']
                            r = requests.get(audio_link, allow_redirects=True)
                            filepath = os.path.join(sys.path[0], 'audio.mp4')
                            open(filepath, 'wb').write(r.content)
                            export_filepath = os.path.join(
                                sys.path[0], 'audio.wav')
                            # print(filepath)
                            # print(export_filepath)
                            try:
                                # track = AudioSegment.from_file("filepath")
                                # track.export(os.path.join(sys.path[0],'audio.wav'))
                                if os.path.exists(export_filepath):
                                    os.remove(export_filepath)
                                subprocess.call([
                                    'ffmpeg', '-i', filepath, export_filepath
                                ])
                                # print("successfully Converted!")
                            except Exception as e:
                                print(e)
                            # print(filepath)
                            # urllib.request.urlretrieve(audio_link,'.\audio.mp4')
                            # print(audio_link)
                            # with open(audio_link,'rb') as f:
                            #     print('inside audio file')
                            audio_resp = ut.get_audio_response(export_filepath)
                            # print(audio_resp)
                            # print(audio_link)
                        else:
                            audio_link = None
                    #ECHO
                    # print(messaging_text)
                    # response = messaging_text
                    if messaging_text is not None or audio_resp is not None:

                        greetings = ut.is_greetings(messaging_text, audio_resp)
                        # print(greetings)
                        if greetings:
                            response = ut.handle_greetings()
                            print(response)
                            bot.send_text_message(sender_id, response)
                        else:
                            emot, emotconf = ut.get_emotion(
                                messaging_text, audio_resp)
                            sent, sentconf = ut.get_sentiment(
                                messaging_text, audio_resp)
                            cont, contconf = ut.get_context(
                                messaging_text, audio_resp)
                            # print(emot,sent,cont)

                            util_resp = ut.handle_response(
                                emot, sent, emotconf, sentconf)
                            action_resp = ut.generate_action(
                                util_resp, cont, contconf)
                            act_list = action_resp
                            share_var = False
                            partner_var = False
                            if 'share' in act_list:
                                share_var = True
                            if 'partner' in act_list:
                                partner_var = True

                            elements = ac.get_element(action_resp)
                            resp_emot = ac.get_emotion_response(util_resp)
                            default_text = "These are my suggestions"
                            response = resp_emot + "\n" + default_text

                            bot.send_text_message(sender_id, response)
                            if elements:
                                bot.send_generic_message(sender_id, elements)

                            if share_var:
                                # print('here')
                                bot.send_text_message(
                                    sender_id,
                                    'You can also share your achievements on your wall!'
                                )
                            if partner_var:
                                # print('here')
                                bot.send_text_message(
                                    sender_id,
                                    'You can also call your partner')

                    # bot.send_text_message(sender_id,response)

    return "ok", 200
コード例 #37
0
 def get(self):
     context = utils.get_context(self.request)
     insert_details(context)
     path = os.path.join(os.path.dirname(__file__), 'templates/thanks.html')
     self.response.out.write(template.render(path, context))
コード例 #38
0
def enforce_max_size(token_embedding, size):
    if size and len(token_embedding.idx_to_token) > size:
        token_embedding._idx_to_token = token_embedding._idx_to_token[:size]
        token_embedding._idx_to_vec = token_embedding._idx_to_vec[:size]
        token_embedding._token_to_idx = {
            token: idx
            for idx, token in enumerate(token_embedding._idx_to_token)
        }


if __name__ == '__main__':
    logging.basicConfig()
    logging.getLogger().setLevel(logging.INFO)

    args_ = get_args()
    ctx = utils.get_context(args_)[0]
    if not os.path.isdir(args_.logdir):
        os.makedirs(args_.logdir)

    # Load pre-trained embeddings
    if not args_.embedding_path:
        if args_.embedding_name.lower() == 'fasttext':
            token_embedding_ = nlp.embedding.create(
                args_.embedding_name,
                source=args_.embedding_source,
                load_ngrams=args_.fasttext_load_ngrams,
                allow_extend=True,
                unknown_autoextend=True)
        else:
            token_embedding_ = nlp.embedding.create(
                args_.embedding_name, source=args_.embedding_source)
コード例 #39
0
def train(args):
    """Training helper."""
    if args.ngram_buckets:  # Fasttext model
        coded_dataset, negatives_sampler, vocab, subword_function, \
            idx_to_subwordidxs = get_train_data(args)
        embedding = nlp.model.train.FasttextEmbeddingModel(
            token_to_idx=vocab.token_to_idx,
            subword_function=subword_function,
            embedding_size=args.emsize,
            weight_initializer=mx.init.Uniform(scale=1 / args.emsize),
            sparse_grad=not args.no_sparse_grad,
        )
    else:
        coded_dataset, negatives_sampler, vocab = get_train_data(args)
        embedding = nlp.model.train.SimpleEmbeddingModel(
            token_to_idx=vocab.token_to_idx,
            embedding_size=args.emsize,
            weight_initializer=mx.init.Uniform(scale=1 / args.emsize),
            sparse_grad=not args.no_sparse_grad,
        )
    embedding_out = nlp.model.train.SimpleEmbeddingModel(
        token_to_idx=vocab.token_to_idx,
        embedding_size=args.emsize,
        weight_initializer=mx.init.Zero(),
        sparse_grad=not args.no_sparse_grad,
    )
    loss_function = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss()

    context = get_context(args)
    embedding.initialize(ctx=context)
    embedding_out.initialize(ctx=context)
    if not args.no_hybridize:
        embedding.hybridize(static_alloc=not args.no_static_alloc)
        embedding_out.hybridize(static_alloc=not args.no_static_alloc)

    optimizer_kwargs = dict(learning_rate=args.lr)
    params = list(embedding.embedding.collect_params().values()) + \
        list(embedding_out.collect_params().values())
    trainer = mx.gluon.Trainer(params, args.optimizer, optimizer_kwargs)

    if args.ngram_buckets:  # Fasttext model
        optimizer_subwords_kwargs = dict(learning_rate=args.lr_subwords)
        params_subwords = list(
            embedding.subword_embedding.collect_params().values())
        trainer_subwords = mx.gluon.Trainer(params_subwords,
                                            args.optimizer_subwords,
                                            optimizer_subwords_kwargs)

    num_update = 0
    for epoch in range(args.epochs):
        random.shuffle(coded_dataset)
        context_sampler = nlp.data.ContextSampler(coded=coded_dataset,
                                                  batch_size=args.batch_size,
                                                  window=args.window)
        num_batches = len(context_sampler)

        # Logging variables
        log_wc = 0
        log_start_time = time.time()
        log_avg_loss = 0

        for i, batch in enumerate(context_sampler):
            progress = (epoch * num_batches + i) / (args.epochs * num_batches)
            (center, word_context, word_context_mask) = batch
            negatives_shape = (word_context.shape[0],
                               word_context.shape[1] * args.negative)
            negatives, negatives_mask = remove_accidental_hits(
                negatives_sampler(negatives_shape), word_context,
                word_context_mask)

            if args.ngram_buckets:  # Fasttext model
                if args.model.lower() == 'skipgram':
                    unique, inverse_unique_indices = np.unique(
                        center.asnumpy(), return_inverse=True)
                    unique = mx.nd.array(unique)
                    inverse_unique_indices = mx.nd.array(
                        inverse_unique_indices, ctx=context[0])
                    subwords, subwords_mask = \
                        indices_to_subwordindices_mask(unique, idx_to_subwordidxs)
                elif args.model.lower() == 'cbow':
                    unique, inverse_unique_indices = np.unique(
                        word_context.asnumpy(), return_inverse=True)
                    unique = mx.nd.array(unique)
                    inverse_unique_indices = mx.nd.array(
                        inverse_unique_indices, ctx=context[0])
                    subwords, subwords_mask = \
                        indices_to_subwordindices_mask(unique, idx_to_subwordidxs)
                else:
                    logging.error('Unsupported model %s.', args.model)
                    sys.exit(1)

            num_update += len(center)

            # To GPU
            center = center.as_in_context(context[0])
            if args.ngram_buckets:  # Fasttext model
                subwords = subwords.as_in_context(context[0])
                subwords_mask = subwords_mask.astype(np.float32).as_in_context(
                    context[0])
            word_context = word_context.as_in_context(context[0])
            word_context_mask = word_context_mask.as_in_context(context[0])
            negatives = negatives.as_in_context(context[0])
            negatives_mask = negatives_mask.as_in_context(context[0])

            with mx.autograd.record():
                # Combine subword level embeddings with word embeddings
                if args.model.lower() == 'skipgram':
                    if args.ngram_buckets:
                        emb_in = embedding(center, subwords,
                                           subwordsmask=subwords_mask,
                                           words_to_unique_subwords_indices=
                                           inverse_unique_indices)
                    else:
                        emb_in = embedding(center)

                    with mx.autograd.pause():
                        word_context_negatives = mx.nd.concat(
                            word_context, negatives, dim=1)
                        word_context_negatives_mask = mx.nd.concat(
                            word_context_mask, negatives_mask, dim=1)

                    emb_out = embedding_out(word_context_negatives,
                                            word_context_negatives_mask)

                    # Compute loss
                    pred = mx.nd.batch_dot(emb_in, emb_out.swapaxes(1, 2))
                    pred = pred.squeeze() * word_context_negatives_mask
                    label = mx.nd.concat(word_context_mask,
                                         mx.nd.zeros_like(negatives), dim=1)

                elif args.model.lower() == 'cbow':
                    word_context = word_context.reshape((-3, 1))
                    word_context_mask = word_context_mask.reshape((-3, 1))
                    if args.ngram_buckets:
                        emb_in = embedding(word_context, subwords,
                                           word_context_mask, subwords_mask,
                                           inverse_unique_indices)
                    else:
                        emb_in = embedding(word_context, word_context_mask)

                    with mx.autograd.pause():
                        center = center.tile(args.window * 2).reshape((-1, 1))
                        negatives = negatives.reshape((-1, args.negative))

                        center_negatives = mx.nd.concat(
                            center, negatives, dim=1)
                        center_negatives_mask = mx.nd.concat(
                            mx.nd.ones_like(center), negatives_mask, dim=1)

                    emb_out = embedding_out(center_negatives,
                                            center_negatives_mask)

                    # Compute loss
                    pred = mx.nd.batch_dot(emb_in, emb_out.swapaxes(1, 2))
                    pred = pred.squeeze() * word_context_mask
                    label = mx.nd.concat(
                        mx.nd.ones_like(word_context),
                        mx.nd.zeros_like(negatives), dim=1)

                loss = loss_function(pred, label)

            loss.backward()

            if args.optimizer.lower() != 'adagrad':
                trainer.set_learning_rate(
                    max(0.0001, args.lr * (1 - progress)))

            if (args.optimizer_subwords.lower() != 'adagrad'
                    and args.ngram_buckets):
                trainer_subwords.set_learning_rate(
                    max(0.0001, args.lr_subwords * (1 - progress)))

            trainer.step(batch_size=1)
            if args.ngram_buckets:
                trainer_subwords.step(batch_size=1)

            # Logging
            log_wc += loss.shape[0]
            log_avg_loss += loss.mean()
            if (i + 1) % args.log_interval == 0:
                wps = log_wc / (time.time() - log_start_time)
                # Forces waiting for computation by computing loss value
                log_avg_loss = log_avg_loss.asscalar() / args.log_interval
                logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, '
                             'throughput={:.2f}K wps, wc={:.2f}K'.format(
                                 epoch, i + 1, num_batches, log_avg_loss,
                                 wps / 1000, log_wc / 1000))
                log_start_time = time.time()
                log_avg_loss = 0
                log_wc = 0

            if args.eval_interval and (i + 1) % args.eval_interval == 0:
                with print_time('mx.nd.waitall()'):
                    mx.nd.waitall()
                with print_time('evaluate'):
                    evaluate(args, embedding, vocab, num_update)

    # Evaluate
    with print_time('mx.nd.waitall()'):
        mx.nd.waitall()
    with print_time('evaluate'):
        evaluate(args, embedding, vocab, num_update,
                 eval_analogy=not args.no_eval_analogy)

    # Save params
    with print_time('save parameters'):
        save_params(args, embedding, embedding_out)
コード例 #40
0
def main(debug_mode=False):
    file_path = Path('commiter.yml')
    debug('file_path', file_path, debug_mode)
    if file_path.is_file():
        with open(str(file_path), 'r') as stream:
            try:
                config = safe_load(stream)
                debug('convention from file', config['convention'], debug_mode)
                if config['convention'] is not None:
                    convention = str(config['convention']).lower()
                else:
                    convention = 'none'

                if convention == 'none':
                    print('You are not using a convention')
                    commit_message = just_message()

                else:
                    print('You are using the %s convention' % convention)
                    tag, msg = get_text()
                    if convention == 'angular' or convention == 'karma':
                        context = get_context()
                        commit_message = angular_convention(tag, msg, context)
                    elif convention == 'changelog':
                        commit_message = changelog_convention(tag, msg)
                    elif convention == 'symphony':
                        commit_message = symphony_convention(tag, msg)

                commit_message += gen_co_author(args.co_author)
                debug('commit message', commit_message, debug_mode)
                system('git commit -m "%s"' % commit_message)

            except YAMLError as exc:
                print(exc)

    elif args.convention is not '':
        convention = str(args.convention)
        debug('convention flag', convention, debug_mode)

        if convention == 'message':
            commit_message = just_message()
            create_file('none', args.no_file)

        else:
            tag, msg = get_text()

            if convention == 'angular' or convention == 'karma':
                context = get_context()
                commit_message = angular_convention(tag, msg, context)
                create_file(convention, args.no_file)
            elif convention == 'changelog':
                commit_message = changelog_convention(tag, msg)
                create_file(convention, args.no_file)
            elif convention == 'symphony':
                commit_message = symphony_convention(tag, msg)
                create_file(convention, args.no_file)

        commit_message += gen_co_author(args.co_author)
        debug('commit message', commit_message, debug_mode)
        system('git commit -m "%s"' % commit_message)

    else:
        debug('parser full return', parser.parse_args(), debug_mode)
        parser.print_help()
コード例 #41
0
def train(args):
    """Training helper."""
    if not args.model.lower() in ['cbow', 'skipgram']:
        logging.error('Unsupported model %s.', args.model)
        sys.exit(1)

    if args.data.lower() == 'toy':
        data = mx.gluon.data.SimpleDataset(nlp.data.Text8(segment='train')[:2])
        data, vocab, idx_to_counts = preprocess_dataset(
            data, max_vocab_size=args.max_vocab_size)
    elif args.data.lower() == 'text8':
        data = nlp.data.Text8(segment='train')
        data, vocab, idx_to_counts = preprocess_dataset(
            data, max_vocab_size=args.max_vocab_size)
    elif args.data.lower() == 'fil9':
        data = nlp.data.Fil9(max_sentence_length=10000)
        data, vocab, idx_to_counts = preprocess_dataset(
            data, max_vocab_size=args.max_vocab_size)
    elif args.data.lower() == 'wiki':
        data, vocab, idx_to_counts = wiki(args.wiki_root, args.wiki_date,
                                          args.wiki_language,
                                          args.max_vocab_size)

    if args.ngram_buckets > 0:
        data, batchify_fn, subword_function = transform_data_fasttext(
            data, vocab, idx_to_counts, cbow=args.model.lower() == 'cbow',
            ngram_buckets=args.ngram_buckets, ngrams=args.ngrams,
            batch_size=args.batch_size, window_size=args.window,
            frequent_token_subsampling=args.frequent_token_subsampling)
    else:
        subword_function = None
        data, batchify_fn = transform_data_word2vec(
            data, vocab, idx_to_counts, cbow=args.model.lower() == 'cbow',
            batch_size=args.batch_size, window_size=args.window,
            frequent_token_subsampling=args.frequent_token_subsampling)

    num_tokens = float(sum(idx_to_counts))

    model = CBOW if args.model.lower() == 'cbow' else SG
    embedding = model(token_to_idx=vocab.token_to_idx, output_dim=args.emsize,
                      batch_size=args.batch_size, num_negatives=args.negative,
                      negatives_weights=mx.nd.array(idx_to_counts),
                      subword_function=subword_function)
    context = get_context(args)
    embedding.initialize(ctx=context)
    if not args.no_hybridize:
        embedding.hybridize(static_alloc=True, static_shape=True)

    optimizer_kwargs = dict(learning_rate=args.lr)
    try:
        trainer = mx.gluon.Trainer(embedding.collect_params(), args.optimizer,
                                   optimizer_kwargs)
    except ValueError as e:
        if args.optimizer == 'groupadagrad':
            logging.warning('MXNet <= v1.3 does not contain '
                            'GroupAdaGrad support. Falling back to AdaGrad')
            trainer = mx.gluon.Trainer(embedding.collect_params(), 'adagrad',
                                       optimizer_kwargs)
        else:
            raise e

    try:
        if args.no_prefetch_batch:
            data = data.transform(batchify_fn)
        else:
            from executors import LazyThreadPoolExecutor
            num_cpu = len(os.sched_getaffinity(0))
            ex = LazyThreadPoolExecutor(num_cpu)
    except (ImportError, SyntaxError, AttributeError):
        # Py2 - no async prefetching is supported
        logging.warning(
            'Asynchronous batch prefetching is not supported on Python 2. '
            'Consider upgrading to Python 3 for improved performance.')
        data = data.transform(batchify_fn)

    num_update = 0
    prefetched_iters = []
    for _ in range(min(args.num_prefetch_epoch, args.epochs)):
        prefetched_iters.append(iter(data))
    for epoch in range(args.epochs):
        if epoch + len(prefetched_iters) < args.epochs:
            prefetched_iters.append(iter(data))
        data_iter = prefetched_iters.pop(0)
        try:
            batches = ex.map(batchify_fn, data_iter)
        except NameError:  # Py 2 or batch prefetching disabled
            batches = data_iter

        # Logging variables
        log_wc = 0
        log_start_time = time.time()
        log_avg_loss = 0

        for i, batch in enumerate(batches):
            ctx = context[i % len(context)]
            batch = [array.as_in_context(ctx) for array in batch]
            with mx.autograd.record():
                loss = embedding(*batch)
            loss.backward()

            num_update += loss.shape[0]
            if len(context) == 1 or (i + 1) % len(context) == 0:
                trainer.step(batch_size=1)

            # Logging
            log_wc += loss.shape[0]
            log_avg_loss += loss.mean().as_in_context(context[0])
            if (i + 1) % args.log_interval == 0:
                # Forces waiting for computation by computing loss value
                log_avg_loss = log_avg_loss.asscalar() / args.log_interval
                wps = log_wc / (time.time() - log_start_time)
                # Due to subsampling, the overall number of batches is an upper
                # bound
                num_batches = num_tokens // args.batch_size
                if args.model.lower() == 'skipgram':
                    num_batches = (num_tokens * args.window * 2) // args.batch_size
                else:
                    num_batches = num_tokens // args.batch_size
                logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, '
                             'throughput={:.2f}K wps, wc={:.2f}K'.format(
                                 epoch, i + 1, num_batches, log_avg_loss,
                                 wps / 1000, log_wc / 1000))
                log_start_time = time.time()
                log_avg_loss = 0
                log_wc = 0

            if args.eval_interval and (i + 1) % args.eval_interval == 0:
                with print_time('mx.nd.waitall()'):
                    mx.nd.waitall()
                with print_time('evaluate'):
                    evaluate(args, embedding, vocab, num_update)

    # Evaluate
    with print_time('mx.nd.waitall()'):
        mx.nd.waitall()
    with print_time('evaluate'):
        evaluate(args, embedding, vocab, num_update,
                 eval_analogy=not args.no_eval_analogy)

    # Save params
    with print_time('save parameters'):
        embedding.save_parameters(os.path.join(args.logdir, 'embedding.params'))
コード例 #42
0
ファイル: example_thirdorder.py プロジェクト: wishvivek/morb
import matplotlib.pyplot as plt
plt.ion()

from utils import generate_data, get_context

# DEBUGGING

from theano import ProfileMode
# mode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
# mode = theano.compile.DebugMode(check_py_code=False, require_matching_strides=False)
mode = None

# generate data
print ">> Generating dataset..."
data = generate_data(1000)  # np.random.randint(2, size=(10000, n_visible))
data_context = get_context(data, N=1)  # keep the number of dimensions low

data_train = data[:-1000, :]
data_eval = data[-1000:, :]
data_context_train = data_context[:-1000, :]
data_context_eval = data_context[-1000:, :]

n_visible = data.shape[1]
n_context = data_context.shape[1]
n_hidden = 20

print ">> Constructing RBM..."
numpy_rng = np.random.RandomState(123)
initial_W = np.asarray(np.random.uniform(
    low=-4 * np.sqrt(6. / (n_hidden + n_visible + n_context)),
    high=4 * np.sqrt(6. / (n_hidden + n_visible + n_context)),
コード例 #43
0
ファイル: example_crbm.py プロジェクト: wishvivek/morb
import matplotlib.pyplot as plt
plt.ion()

from utils import generate_data, get_context

# DEBUGGING

from theano import ProfileMode
# mode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
# mode = theano.compile.DebugMode(check_py_code=False, require_matching_strides=False)
mode = None

# generate data
print ">> Generating dataset..."
data = generate_data(1000)  # np.random.randint(2, size=(10000, n_visible))
data_context = get_context(data)

data_train = data[:-1000, :]
data_eval = data[-1000:, :]
data_context_train = data_context[:-1000, :]
data_context_eval = data_context[-1000:, :]

n_visible = data.shape[1]
n_context = data_context.shape[1]
n_hidden = 100

print ">> Constructing RBM..."
rbm = rbms.BinaryBinaryCRBM(n_visible, n_hidden, n_context)
initial_vmap = {rbm.v: T.matrix('v'), rbm.x: T.matrix('x')}

# try to calculate weight updates using CD-1 stats
コード例 #44
0
ファイル: main.py プロジェクト: breber/cdcsite
 def get(self):
     context = utils.get_context(self.auth)
     path = os.path.join(os.path.dirname(__file__), 'templates/thanks.html')
     self.response.out.write(template.render(path, context))
コード例 #45
0
plt.ion()

from utils import generate_data, get_context

# DEBUGGING

from theano import ProfileMode
# mode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
# mode = theano.compile.DebugMode(check_py_code=False, require_matching_strides=False)
mode = None


# generate data
print ">> Generating dataset..."
data = generate_data(1000) # np.random.randint(2, size=(10000, n_visible))
data_context = get_context(data, N=1) # keep the number of dimensions low

data_train = data[:-1000, :]
data_eval = data[-1000:, :]
data_context_train = data_context[:-1000, :]
data_context_eval = data_context[-1000:, :]

n_visible = data.shape[1]
n_context = data_context.shape[1]
n_hidden = 20
n_factors = 50

print ">> Constructing RBM..."
numpy_rng = np.random.RandomState(123)

def initial_W(n, f):
コード例 #46
0
def train(args):
    """Training helper."""
    vocab, row, col, counts = get_train_data(args)
    model = GloVe(token_to_idx=vocab.token_to_idx, output_dim=args.emsize,
                  dropout=args.dropout, x_max=args.x_max, alpha=args.alpha,
                  weight_initializer=mx.init.Uniform(scale=1 / args.emsize))
    context = get_context(args)
    model.initialize(ctx=context)
    if not args.no_hybridize:
        model.hybridize(static_alloc=not args.no_static_alloc)

    optimizer_kwargs = dict(learning_rate=args.lr, eps=args.adagrad_eps)
    params = list(model.collect_params().values())
    try:
        trainer = mx.gluon.Trainer(params, 'groupadagrad', optimizer_kwargs)
    except ValueError:
        logging.warning('MXNet <= v1.3 does not contain '
                        'GroupAdaGrad support. Falling back to AdaGrad')
        trainer = mx.gluon.Trainer(params, 'adagrad', optimizer_kwargs)

    index_dtype = 'int32'
    if counts.shape[0] >= np.iinfo(np.int32).max:
        index_dtype = 'int64'
        logging.info('Co-occurrence matrix is large. '
                     'Using int64 to represent sample indices.')
    indices = mx.nd.arange(counts.shape[0], dtype=index_dtype)
    for epoch in range(args.epochs):
        # Logging variables
        log_wc = 0
        log_start_time = time.time()
        log_avg_loss = 0

        mx.nd.shuffle(indices, indices)  # inplace shuffle
        bs = args.batch_size
        num_batches = indices.shape[0] // bs
        for i in range(num_batches):
            batch_indices = indices[bs * i:bs * (i + 1)]
            ctx = context[i % len(context)]
            batch_row = row[batch_indices].as_in_context(ctx)
            batch_col = col[batch_indices].as_in_context(ctx)
            batch_counts = counts[batch_indices].as_in_context(ctx)
            with mx.autograd.record():
                loss = model(batch_row, batch_col, batch_counts)
                loss.backward()

            if len(context) == 1 or (i + 1) % len(context) == 0:
                trainer.step(batch_size=1)

            # Logging
            log_wc += loss.shape[0]
            log_avg_loss += loss.mean().as_in_context(context[0])
            if (i + 1) % args.log_interval == 0:
                # Forces waiting for computation by computing loss value
                log_avg_loss = log_avg_loss.asscalar() / args.log_interval
                wps = log_wc / (time.time() - log_start_time)
                logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, '
                             'throughput={:.2f}K wps, wc={:.2f}K'.format(
                                 epoch, i + 1, num_batches, log_avg_loss,
                                 wps / 1000, log_wc / 1000))
                log_dict = dict(
                    global_step=epoch * len(indices) + i * args.batch_size,
                    epoch=epoch, batch=i + 1, loss=log_avg_loss,
                    wps=wps / 1000)
                log(args, log_dict)

                log_start_time = time.time()
                log_avg_loss = 0
                log_wc = 0

            if args.eval_interval and (i + 1) % args.eval_interval == 0:
                with print_time('mx.nd.waitall()'):
                    mx.nd.waitall()
                with print_time('evaluate'):
                    evaluate(args, model, vocab, i + num_batches * epoch)

    # Evaluate
    with print_time('mx.nd.waitall()'):
        mx.nd.waitall()
    with print_time('evaluate'):
        evaluate(args, model, vocab, num_batches * args.epochs,
                 eval_analogy=not args.no_eval_analogy)

    # Save params
    with print_time('save parameters'):
        model.save_parameters(os.path.join(args.logdir, 'glove.params'))
コード例 #47
0
def get_model(args):
    """Load the pretrained model."""
    context = utils.get_context(args)

    assert '.bin' in args.path  # Assume binary fasttext format

    gensim_fasttext = gensim.models.FastText()
    gensim_fasttext.file_name = args.path
    with open(args.path, 'rb') as f:
        gensim_fasttext._load_model_params(f)
        gensim_fasttext._load_dict(f)

        if gensim_fasttext.new_format:
            # quant input
            gensim_fasttext.struct_unpack(f, '@?')
        num_vectors, dim = gensim_fasttext.struct_unpack(f, '@2q')
        assert gensim_fasttext.wv.vector_size == dim
        dtype = np.float32 if struct.calcsize('@f') == 4 else np.float64
        matrix = np.fromfile(f, dtype=dtype, count=num_vectors * dim)
        matrix = matrix.reshape((-1, dim))

        num_words = len(gensim_fasttext.wv.vocab)
        num_subwords = gensim_fasttext.bucket
        assert num_words + num_subwords == num_vectors

    if args.max_vocab_size:
        idx_to_token = list(
            gensim_fasttext.wv.vocab.keys())[:args.max_vocab_size]
        idx_to_vec = mx.nd.array(matrix[:len(idx_to_token)])
        token_to_idx = {(token, idx) for idx, token in enumerate(idx_to_token)}
    else:
        idx_to_token = list(gensim_fasttext.wv.vocab.keys())
        idx_to_vec = mx.nd.array(matrix[:num_words])
        token_to_idx = {(token, idx) for idx, token in enumerate(idx_to_token)}

    if num_subwords:
        subword_function = nlp.vocab.create_subword_function(
            'NGramHashes', num_subwords=num_subwords)

        embedding = nlp.model.train.FasttextEmbeddingModel(
            token_to_idx=token_to_idx,
            subword_function=subword_function,
            embedding_size=dim,
        )

        embedding.initialize(ctx=context[0])
        embedding.embedding.weight.set_data(idx_to_vec)
        embedding.subword_embedding.embedding.weight.set_data(
            mx.nd.array(matrix[num_words:]))
    else:
        print('Loaded model does not contain subwords.')

        embedding = nlp.model.train.SimpleEmbeddingModel(
            token_to_idx=token_to_idx,
            embedding_size=dim,
        )

        embedding.initialize(ctx=context[0])
        embedding.embedding.weight.set_data(idx_to_vec)

    return embedding, idx_to_token
コード例 #48
0
def main():
    doc = DocxTemplate(f'templates/{settings["template_filename"]}.docx')
    context = get_context(settings, current_period_data)
    doc.render(context)
    doc.save(f'output/{context["output_doc_name"]}.docx')
    print_output_information(context)
コード例 #49
0
ファイル: main.py プロジェクト: breber/cardgames-web
    def get(self):
        context = utils.get_context()
        path = 'welcome.html'

        self.render_template(path, context)
コード例 #50
0
ファイル: train_fasttext.py プロジェクト: xiaohr/gluon-nlp
def train(args):
    """Training helper."""
    if args.ngram_buckets:
        data, negatives_sampler, vocab, subword_function, \
            subword_lookup, num_tokens, idx_to_subwordidxs = get_train_data(args)
        embedding = nlp.model.train.FasttextEmbeddingModel(
            token_to_idx=vocab.token_to_idx,
            subword_function=subword_function,
            embedding_size=args.emsize,
            weight_initializer=mx.init.Uniform(scale=1 / args.emsize),
            sparse_grad=not args.no_sparse_grad,
        )
    else:
        data, negatives_sampler, vocab, num_tokens = get_train_data(args)
        embedding = nlp.model.train.SimpleEmbeddingModel(
            token_to_idx=vocab.token_to_idx,
            embedding_size=args.emsize,
            weight_initializer=mx.init.Uniform(scale=1 / args.emsize),
            sparse_grad=not args.no_sparse_grad,
        )
    embedding_out = nlp.model.train.SimpleEmbeddingModel(
        token_to_idx=vocab.token_to_idx,
        embedding_size=args.emsize,
        weight_initializer=mx.init.Zero(),
        sparse_grad=not args.no_sparse_grad,
    )
    loss_function = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss()

    context = get_context(args)
    embedding.initialize(ctx=context)
    embedding_out.initialize(ctx=context)
    if not args.no_hybridize:
        embedding.hybridize(static_alloc=not args.no_static_alloc)
        embedding_out.hybridize(static_alloc=not args.no_static_alloc)

    optimizer_kwargs = dict(learning_rate=args.lr)
    params = list(embedding.collect_params().values()) + \
        list(embedding_out.collect_params().values())
    trainer = mx.gluon.Trainer(params, args.optimizer, optimizer_kwargs)

    def skipgram_batch(data):
        """Create a batch for Skipgram training objective."""
        centers, word_context, word_context_mask = data
        assert len(centers.shape) == 2
        negatives_shape = (len(word_context), 2 * args.window * args.negative)
        negatives, negatives_mask = remove_accidental_hits(
            negatives_sampler(negatives_shape), word_context)
        context_negatives = mx.nd.concat(word_context, negatives, dim=1)
        masks = mx.nd.concat(word_context_mask, negatives_mask, dim=1)
        labels = mx.nd.concat(word_context_mask,
                              mx.nd.zeros_like(negatives),
                              dim=1)
        if not args.ngram_buckets:
            return (centers.as_in_context(context[0]),
                    context_negatives.as_in_context(context[0]),
                    masks.as_in_context(context[0]),
                    labels.as_in_context(context[0]))
        else:
            unique, inverse_unique_indices = np.unique(centers.asnumpy(),
                                                       return_inverse=True)
            inverse_unique_indices = mx.nd.array(inverse_unique_indices,
                                                 ctx=context[0])
            subwords, subwords_mask = subword_lookup.get(unique.astype(int))

            return (centers.as_in_context(context[0]),
                    context_negatives.as_in_context(context[0]),
                    masks.as_in_context(context[0]),
                    labels.as_in_context(context[0]),
                    mx.nd.array(subwords, ctx=context[0]),
                    mx.nd.array(subwords_mask,
                                ctx=context[0]), inverse_unique_indices)

    def cbow_batch(data):
        """Create a batch for CBOW training objective."""
        centers, word_context, word_context_mask = data
        assert len(centers.shape) == 2
        negatives_shape = (len(centers), args.negative)
        negatives, negatives_mask = remove_accidental_hits(
            negatives_sampler(negatives_shape), centers)
        center_negatives = mx.nd.concat(centers, negatives, dim=1)
        center_negatives_mask = mx.nd.concat(mx.nd.ones_like(centers),
                                             negatives_mask,
                                             dim=1)
        labels = mx.nd.concat(mx.nd.ones_like(centers),
                              mx.nd.zeros_like(negatives),
                              dim=1)
        if not args.ngram_buckets:
            return (word_context.as_in_context(context[0]),
                    word_context_mask.as_in_context(context[0]),
                    center_negatives.as_in_context(context[0]),
                    center_negatives_mask.as_in_context(context[0]),
                    labels.as_in_context(context[0]))
        else:
            unique, inverse_unique_indices = np.unique(word_context.asnumpy(),
                                                       return_inverse=True)
            inverse_unique_indices = mx.nd.array(inverse_unique_indices,
                                                 ctx=context[0])
            subwords, subwords_mask = subword_lookup.get(unique.astype(int))
            return (word_context.as_in_context(context[0]),
                    word_context_mask.as_in_context(context[0]),
                    center_negatives.as_in_context(context[0]),
                    center_negatives_mask.as_in_context(context[0]),
                    labels.as_in_context(context[0]),
                    mx.nd.array(subwords, ctx=context[0]),
                    mx.nd.array(subwords_mask,
                                ctx=context[0]), inverse_unique_indices)

    # Helpers for bucketing
    def skipgram_length_fn(data):
        """Return lengths for bucketing."""
        centers, _, _ = data
        lengths = [
            len(idx_to_subwordidxs[i])
            for i in centers.asnumpy().astype(int).flat
        ]
        return lengths

    def cbow_length_fn(data):
        """Return lengths for bucketing."""
        _, word_context, _ = data
        word_context_np = word_context.asnumpy().astype(int)
        lengths = [
            max(len(idx_to_subwordidxs[i]) for i in one_context)
            for one_context in word_context_np
        ]
        return lengths

    def bucketing_batchify_fn(indices, data):
        """Select elements from data batch based on bucket indices."""
        centers, word_context, word_context_mask = data
        return (centers[indices], word_context[indices],
                word_context_mask[indices])

    length_fn = skipgram_length_fn if args.model.lower() == 'skipgram' \
        else cbow_length_fn

    bucketing_split = 16
    batchify = nlp.data.batchify.EmbeddingCenterContextBatchify(
        batch_size=args.batch_size *
        bucketing_split if args.ngram_buckets else args.batch_size,
        window_size=args.window)
    batches = data.transform(batchify)

    num_update = 0
    for epoch in range(args.epochs):
        # Logging variables
        log_wc = 0
        log_start_time = time.time()
        log_avg_loss = 0

        batches = itertools.chain.from_iterable(batches)

        if args.ngram_buckets:
            # For fastText training, create batches such that subwords used in
            # that batch are of similar length
            batches = BucketingStream(batches, bucketing_split, length_fn,
                                      bucketing_batchify_fn)

        for i, batch in enumerate(batches):
            progress = (epoch * num_tokens + i * args.batch_size) / \
                (args.epochs * num_tokens)

            if args.model.lower() == 'skipgram':
                if args.ngram_buckets:
                    (center, context_negatives, mask, label, subwords,
                     subwords_mask,
                     inverse_unique_indices) = skipgram_batch(batch)
                    with mx.autograd.record():
                        emb_in = embedding(center,
                                           subwords,
                                           subwordsmask=subwords_mask,
                                           words_to_unique_subwords_indices=
                                           inverse_unique_indices)
                        emb_out = embedding_out(context_negatives, mask)
                        pred = mx.nd.batch_dot(emb_in, emb_out.swapaxes(1, 2))
                        loss = (loss_function(pred, label, mask) *
                                mask.shape[1] / mask.sum(axis=1))
                else:
                    (center, context_negatives, mask,
                     label) = skipgram_batch(batch)
                    with mx.autograd.record():
                        emb_in = embedding(center)
                        emb_out = embedding_out(context_negatives, mask)
                        pred = mx.nd.batch_dot(emb_in, emb_out.swapaxes(1, 2))
                        loss = (loss_function(pred, label, mask) *
                                mask.shape[1] / mask.sum(axis=1))
            elif args.model.lower() == 'cbow':
                if args.ngram_buckets:
                    (word_context, word_context_mask, center_negatives,
                     center_negatives_mask, label, subwords, subwords_mask,
                     inverse_unique_indices) = cbow_batch(batch)
                    with mx.autograd.record():
                        emb_in = embedding(word_context,
                                           subwords,
                                           wordsmask=word_context_mask,
                                           subwordsmask=subwords_mask,
                                           words_to_unique_subwords_indices=
                                           inverse_unique_indices)
                        emb_in = emb_in.mean(axis=1, keepdims=True)
                        emb_out = embedding_out(
                            center_negatives, wordsmask=center_negatives_mask)
                        pred = mx.nd.batch_dot(emb_in, emb_out.swapaxes(1, 2))
                        loss = (loss_function(pred.squeeze(), label,
                                              center_negatives_mask) *
                                center_negatives_mask.shape[1] /
                                center_negatives_mask.sum(axis=1))
                else:
                    (word_context, word_context_mask, center_negatives,
                     center_negatives_mask, label) = cbow_batch(batch)
                    with mx.autograd.record():
                        emb_in = embedding(word_context,
                                           wordsmask=word_context_mask)
                        emb_in = emb_in.mean(axis=1, keepdims=True)
                        emb_out = embedding_out(
                            center_negatives, wordsmask=center_negatives_mask)
                        pred = mx.nd.batch_dot(emb_in, emb_out.swapaxes(1, 2))
                        loss = (loss_function(pred.squeeze(), label,
                                              center_negatives_mask) *
                                center_negatives_mask.shape[1] /
                                center_negatives_mask.sum(axis=1))
            else:
                logging.error('Unsupported model %s.', args.model)
                sys.exit(1)

            loss.backward()
            num_update += len(label)
            if args.optimizer.lower() != 'adagrad':
                trainer.set_learning_rate(max(0.0001,
                                              args.lr * (1 - progress)))

            trainer.step(batch_size=1)

            # Logging
            log_wc += loss.shape[0]
            log_avg_loss += loss.mean()
            if (i + 1) % args.log_interval == 0:
                # Forces waiting for computation by computing loss value
                log_avg_loss = log_avg_loss.asscalar() / args.log_interval
                wps = log_wc / (time.time() - log_start_time)
                # Due to subsampling, the overall number of batches is an upper bound
                logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, '
                             'throughput={:.2f}K wps, wc={:.2f}K'.format(
                                 epoch, i + 1, num_tokens // args.batch_size,
                                 log_avg_loss, wps / 1000, log_wc / 1000))
                log_start_time = time.time()
                log_avg_loss = 0
                log_wc = 0

            if args.eval_interval and (i + 1) % args.eval_interval == 0:
                with print_time('mx.nd.waitall()'):
                    mx.nd.waitall()
                with print_time('evaluate'):
                    evaluate(args, embedding, vocab, num_update)

    # Evaluate
    with print_time('mx.nd.waitall()'):
        mx.nd.waitall()
    with print_time('evaluate'):
        evaluate(args,
                 embedding,
                 vocab,
                 num_update,
                 eval_analogy=not args.no_eval_analogy)

    # Save params
    with print_time('save parameters'):
        save_parameters(args, embedding, embedding_out)
コード例 #51
0
def enforce_max_size(token_embedding, size):
    if size and len(token_embedding.idx_to_token) > size:
        token_embedding._idx_to_token = token_embedding._idx_to_token[:size]
        token_embedding._idx_to_vec = token_embedding._idx_to_vec[:size]
        token_embedding._token_to_idx = {
            token: idx
            for idx, token in enumerate(token_embedding._idx_to_token)
        }


if __name__ == '__main__':
    logging.basicConfig()
    logging.getLogger().setLevel(logging.INFO)

    args_ = get_args()
    ctx = utils.get_context(args_)[0]
    if not os.path.isdir(args_.logdir):
        os.makedirs(args_.logdir)

    # Load pre-trained embeddings
    if not args_.embedding_path:
        if args_.embedding_name.lower() == 'fasttext':
            token_embedding_ = nlp.embedding.create(
                args_.embedding_name,
                source=args_.embedding_source,
                load_ngrams=args_.fasttext_load_ngrams,
                allow_extend=True,
                unknown_autoextend=True)
        else:
            token_embedding_ = nlp.embedding.create(
                args_.embedding_name, source=args_.embedding_source)
コード例 #52
0
 def api_method (self, context=None, *args, **kwargs):
     if requires_context or context is not None:
         context = get_context(context or kwargs, self)
         return method(self, context, *args, **kwargs)
     return method(self, *args, **kwargs)
コード例 #53
0
def train(cfg):
    date_today = date.today().strftime("%b-%d-%Y")
    summary_writer = SummaryWriter(cfg.log_dir,
                                   flush_secs=5,
                                   filename_suffix=date_today)
    train_data = mx.gluon.data.vision.MNIST(
        train=True).transform_first(data_xform)
    train_loader = mx.gluon.data.DataLoader(train_data,
                                            shuffle=True,
                                            batch_size=cfg.batch_size)
    image_shape = train_data[0][0].shape

    # No initialization. Custom blocks encapsulate initialization and setting of data.
    net = Glow(image_shape, cfg.K, cfg.L, cfg.affine, cfg.filter_size,
               cfg.temp, cfg.n_bits)
    ctx = get_context(cfg.use_gpu)
    net = set_context(net, ctx)

    trainer = mx.gluon.Trainer(net.collect_params(), 'adam',
                               {'learning_rate': cfg.lr})
    n_samples = len(train_loader)
    update_interval = n_samples // 2  # store the loss with summary writer twice
    loss_buffer = LossBuffer()
    global_step = 1

    for epoch in range(1, cfg.n_epochs + 1):
        for idx, (batch, label) in enumerate(train_loader):
            print(f'Epoch {epoch} - Batch {idx}/{n_samples}', end='\r')

            data = mx.gluon.utils.split_and_load(batch, ctx)
            with mx.autograd.record():
                for X in data:
                    z_list, nll, bpd = net(X)
                    prev_loss = loss_buffer.new_loss(bpd.mean())

            loss_buffer.loss.backward()
            trainer.step(1)

            if prev_loss is not None and global_step % update_interval == 0:
                loss = prev_loss.asscalar()
                summary_writer.add_scalar(tag='bpd',
                                          value=loss,
                                          global_step=global_step)

            global_step += 1

        # Sample from latent space to generate random digit and reverse from latent
        if (epoch % cfg.plot_interval) == 0:
            x_generate = net.reverse()[0]
            x_generate = x_generate.reshape(1, *x_generate.shape)
            x_recon = net.reverse(z_list[-1])[0]
            x_recon = x_recon.reshape(1, *x_recon.shape)
            x_real = data[0][0].reshape(1, *data[0][0].shape)
            minim = -0.5
            maxim = 0.5
            x_generate = x_generate.clip(minim, maxim)
            x_generate += -minim
            x_recon = x_recon.clip(minim, maxim)
            x_recon += -minim
            x_real += -minim

            img = mx.nd.concatenate([x_real, x_generate, x_recon],
                                    axis=0).asnumpy()
            summary_writer.add_image(tag='generations',
                                     image=img,
                                     global_step=global_step)

    summary_writer.close()