Example #1
0
    def process_all(self):
        try:
            print('process_all ... begin')

            extractor = Extractor(self.language)
            transformer = Transformer()
            extractor.connect()
            num_of_products = extractor.get_num_of_products()

            extractor.execute()
            num_of_rows = 10
            rows = extractor.get_next_batch(num_of_rows)
            pipeline = self.loader.create_pipeline()

            while len(rows) > 0:

                for product in rows:
                    print('\n {}'.format(product))

                    tf = transformer.get_tf(product)
                    print('len tf: {}'.format(len(tf)))
                    self.loader.insert_tf(product['id'], tf, pipeline)

                pipeline.execute()
                rows = extractor.get_next_batch(num_of_rows)

            extractor.close()
            self.loader.count_df()
            self.loader.count_tf_idf(num_of_products)

        except Exception as e:
            print('ETL.process_all(), error: {}'.format(e))
def run_pipeline(in_path, out_path):
    spacy_transformer = Transformer()
    with open(in_path, 'rb') as in_f:
        x = dill.load(in_f)
    y = spacy_transformer.predict(x)
    with open(out_path, "wb") as out_f:
        dill.dump(y, out_f)
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    '''
    src_voc_size = 11
    tgt_voc_size = 11
    train_loader=torch.utils.data.DataLoader(
        MyDataSet(V=11, batch=20),
        num_workers=2,
        batch_size=30,
    )
    valid_loader = torch.utils.data.DataLoader(
        MyDataSet(V=11, batch=20),
        num_workers=2,
        batch_size=30,
    )
    '''
    traindata, valdata, testdata, indexdict, (src_maxlen, trg_maxlen), (src_voc_size, tgt_voc_size) = \
        DataLoader(32).get_data_iterator()
    model = Transformer(src_vocab_size=src_voc_size,
                        src_max_len=src_maxlen,
                        tgt_vocab_size=tgt_voc_size,
                        tgt_max_len=trg_maxlen)

    optimizer = MyOptimizer(model_size=512,
                            factor=1.0,
                            warmup=400,
                            optimizer=optim.Adam(filter(
                                lambda x: x.requires_grad, model.parameters()),
                                                 betas=(0.9, 0.98),
                                                 eps=1e-09))
    train(model, traindata, valdata, optimizer, indexdict, device, 25)
Example #4
0
        async def on_offloding_event(source, rpcname, code, resourcename,
                                     params):
            print("Received from: {} app".format(source))
            print("Received RCP name: {}".format(rpcname))
            print("Received the source code: {}".format(code))
            print("Received params: {}".format(params))

            if source == "Android":
                self.params = params

                # Java file already cached in MAMoC Repository
                if path.exists("java_classes/{}.java".format(self.class_name)):
                    result = self.executor.startExecuting(
                        self.class_name, "{}.java".format(self.class_name),
                        params)

                else:
                    # if it is a class, it must start with package keyword
                    if code.strip().split(' ', 1)[0] == "package":
                        code, self.class_name = Transformer(
                            code, resourcename, params).start()
                    else:
                        code, self.class_name = Transformer(
                            code, resourcename, params).start(type="method")

                    with open("java_classes/{}.java".format(self.class_name),
                              "w") as java_file:
                        print("{}".format(code), file=java_file)

                    result = self.executor.startExecuting(
                        self.class_name, "{}.java".format(self.class_name),
                        params)

                print(result)

                if result:  # if building and execution were successful, send back output and duration in seconds
                    output = result[0]
                    duration = result[1]

                    output = self.decode_bytes(output)

                    self.publish('uk.ac.standrews.cs.mamoc.offloadingresult',
                                 output, duration)

                    # register the procedure for next time rpc request
                    try:
                        re = await self.register(
                            self.execute_java,
                            rpcname,
                            options=RegisterOptions(invoke=u'roundrobin'))
                    except ApplicationError as e:
                        print("could not register procedure: {0}".format(e))
                    else:
                        print("{} endpoints registered".format(re))

            elif source == "iOS":
                print("received from iOS app")
            else:
                print("unrecognized source!")
Example #5
0
	def translate(self,direction):
		raw_image = self.label.pixmap()
		if raw_image is not None:
			transformer = Transformer(self.label_img,"translate")
			self.label_img , pixMap = transformer.translate(direction)
			self.label.setPixmap(pixMap)
		else:
			self.Ui_Dialog = Ui_Dialog()
			self.Ui_Dialog.setupUi(self.Ui_Dialog)
			self.Ui_Dialog.show()
Example #6
0
	def scale(self,scale_val):
		raw_image = self.label.pixmap()
		if raw_image is not None:
			transformer = Transformer(self.label_img,"scale")
			self.label_img , pixMap = transformer.scale(scale_val)
			self.label.setPixmap(pixMap)
		else:
			self.Ui_Dialog = Ui_Dialog()
			self.Ui_Dialog.setupUi(self.Ui_Dialog)
			self.Ui_Dialog.show()
Example #7
0
	def rotate(self,degree):
		raw_image = self.label.pixmap()
		if raw_image is not None:
			transformer = Transformer(self.label_img,"rotate")
			self.label_img , pixMap = transformer.rotate(degree)
			self.label.setPixmap(pixMap)
		else:
			self.Ui_Dialog = Ui_Dialog()
			self.Ui_Dialog.setupUi(self.Ui_Dialog)
			self.Ui_Dialog.show()
Example #8
0
def run_pipeline(in_path, out_path, glove_file):

    with open(in_path, "rb") as in_f:
        x = dill.load(in_f)

    embedder = Transformer()
    y = embedder.predict(x, glove_file)

    with open(out_path, "wb") as out_f:
        dill.dump(y, out_f)
def run_pipeline(in_path, out_path):

    logging.info(f"Instantiating transformer and reading {in_path}")
    clean_text_transformer = Transformer()
    with open(in_path, "rb") as in_f:
        x = dill.load(in_f)

    logging.info("Cleaning raw text")
    y = clean_text_transformer.predict(x)

    logging.info("Saving processed text")
    with open(out_path, "wb") as out_f:
        dill.dump(y, out_f)
Example #10
0
def main():
    print(args)

    data_dir = 'data/'
    TRAIN_X = os.path.join(data_dir, 'train/in.txt')
    TRAIN_Y = os.path.join(data_dir, 'train/out.txt')
    VALID_X = os.path.join(data_dir, 'dev/in.txt')
    VALID_Y = os.path.join(data_dir, 'dev/out.txt')
    EVAL_X = os.path.join(data_dir, 'test/in.txt')
    EVAL_Y = os.path.join(data_dir, 'test/out.txt')

    small_vocab_file = os.path.join(data_dir, 'vocab.json')
    if os.path.exists(small_vocab_file):
        print("Vocab exists!")
        small_vocab = json.load(open(small_vocab_file))
    else:
        small_vocab = build_vocab([TRAIN_X, TRAIN_Y], small_vocab_file, vocab_size=800000)

    max_src_len = 34
    max_tgt_len = 34

    bs = args.batch_size
    n_train = args.n_train
    n_valid = args.n_valid
    n_eval = args.n_eval

    vocab = small_vocab

    train_x = BatchManager(load_data(TRAIN_X, max_src_len, n_train), bs, vocab)
    train_y = BatchManager(load_data(TRAIN_Y, max_tgt_len, n_train), bs, vocab)
    valid_x = BatchManager(load_data(VALID_X, max_src_len, n_valid), bs, vocab)
    valid_y = BatchManager(load_data(VALID_Y, max_tgt_len, n_valid), bs, vocab)
    eval_x = BatchManager(load_data(EVAL_X, max_src_len, n_eval), bs, vocab)
    eval_y = BatchManager(load_data(EVAL_Y, max_tgt_len, n_eval), bs, vocab)
    print("vocab length is: "+ str(len(vocab)))
    model = Transformer(len(vocab), len(vocab), max_src_len, max_tgt_len, 6, 8, 256, 64, 64, 1024, src_tgt_emb_share=True, tgt_prj_emb_share=True).cuda()
    saved_state = {'epoch': 0, 'lr': 0.001}
    if os.path.exists(args.ckpt_file):
        saved_state = torch.load(args.ckpt_file)
        model.load_state_dict(saved_state['state_dict'])
        logging.info('Load model parameters from %s' % args.ckpt_file)

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=saved_state['lr'])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.3)
    scheduler.step()  # last_epoch=-1, which will not update lr at the first time

    # myeval(valid_x, valid_y, vocab, model)
    # train(train_x, train_y, valid_x, valid_y, model, optimizer, vocab, scheduler, args.n_epochs, saved_state['epoch'])
    myeval(eval_x, eval_y, vocab, model)
 def __init__(self, n_grams=None, ngram_extractor=None):
     self.key_n_grams = "n_grams"
     self.key_self_relation = "Self--Relation"
     self.key_frequncy = "frequnce"
     self.meta_field_name_id = "uid"
     self.meta_field_name_doc_id = "docid"
     self.meta_field_name_gene1 = "gene1"
     self.meta_field_name_gene2 = "gene2"
     self.n_gram_names = n_grams
     self.preprocessor_ngram_extractor = ngram_extractor or NGramExtractor(
         vocabulary=self.n_gram_names)
     self.preprocessor_fragment_extractor = PPIFragementExtractor()
     self.logger = logging.getLogger(__name__)
     Transformer.__init__(self)
Example #12
0
def main(data_path):
    dataset = PairsDataset(data_path)
    _, vocab = nlp.model.get_model('bert_12_768_12', dataset_name='wiki_cn_cased',
                                   ctx=CTX, pretrained=True, use_pooler=False, use_decoder=False, use_classifier=False)
    assiant = DatasetAssiant(vocab, vocab, MAX_SOURCE_LEN, MAX_TARGET_LEN)
    dataloader = PairsDataLoader(dataset, BATCH_SIZE, assiant)

    # with open(VOCAB_PATH, 'wb') as fw:
    #     pickle.dump(vocab, fw)
    NWORDS = len(vocab)
    print(NWORDS)
    transformer = Transformer(vocab, vocab, EMBED_SIZE, MODEL_DIM,
                              HEAD_NUM, LAYER_NUM, FFN_DIM, DROPOUT, ATT_DROPOUT, FFN_DROPOUT, CTX)
    transformer.initialize(init.Xavier(), ctx=CTX)
    train(transformer, dataloader, LR, N_EPOCHS, vocab, CTX)
Example #13
0
    def __init__(self, config):

        super(WordModule, self).__init__()

        # <----------- Config ----------->
        self.config = config

        if config.word_module_version in [2, 3, 4]:

            # <----------- Word Level Transformer ----------->
            self.transformer_word = Transformer.Transformer(
                self.config, self.config.n_mha_layers_word,
                self.config.d_model, self.config.n_head_word)

        # <----------- Embedding the words through n FC layers (word level) ----------->
        if self.config.ff_word:
            self.emb_layer_word = nn.ModuleList([
                nn.Linear(self.config.emb_dim, self.config.emb_dim)
                for _ in range(self.config.num_emb_layers_word)
            ])

        if config.word_module_version == 4:

            # <----------- To map each post vector to a scalar ----------->
            self.condense_layer_word = nn.Linear(self.config.d_model, 1)

        # <----------- Droutput for regularization ----------->
        self.dropout = nn.Dropout(p=self.config.dropout_rate, inplace=True)

        # <----------- Initialization of weights ----------->
        if self.config.ff_word:
            self.emb_layer_word.apply(WordModule.init_weights)

        if config.word_module_version == 4:
            self.condense_layer_word.apply(WordModule.init_weights)
Example #14
0
    def __init__(self,
                 KittiDir,
                 phase,
                 high_gpu=True):
        self.phase = phase
        self.high_gpu = high_gpu

        if not self.phase in ['train', 'test', 'val']:
            raise ValueError("Panic::Invalid phase parameter")
        else:
            pass

        transformer = Transformer(self.phase)
        self.dataset = KittiDataset(KittiDir,
                                    phase,
                                    transformer.get_transform())
Example #15
0
def main(epochs, enable_function, buffer_size, batch_size, d_model, dff,
         num_heads, src_vocab_file, tgt_vocab_file, dataset_path, dropout_rate,
         num_layers, sequence_length, ckpt_path, max_ckpt_keep):

    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
    test_log_dir = 'logs/gradient_tape/' + current_time + '/test'

    train_dataset, test_dataset, src_tokenizer, tgt_tokenizer = utils.load_dataset(
        dataset_path, sequence_length, src_vocab_file, tgt_vocab_file,
        batch_size, buffer_size)
    input_vocab_size = src_tokenizer.vocab_size + 2
    target_vocab_size = tgt_tokenizer.vocab_size + 2
    transformer = Transformer(num_layers, d_model, num_heads, dff,
                              input_vocab_size, target_vocab_size,
                              dropout_rate)

    print('create training object')
    train_obj = Train(epochs, enable_function, transformer, src_tokenizer,
                      tgt_tokenizer, batch_size, train_log_dir, test_log_dir,
                      max_ckpt_keep, ckpt_path, d_model)

    train_obj.training_loop(train_dataset, test_dataset)
    train_obj.load_ckpt()
    input_sentence = 'he go to school'
    train_obj.predict(input_sentence)
    tf.saved_model.save(train_obj.transformer, 'model')
def bfs(question, rules, goal):
    cnt = 0
    queue = [QueueElem(question, [])]

    while len(queue) > 0:
        temp = queue[0]
        graph = temp.graph
        path = temp.path

        if len(Maper.getMaps(graph, goal)) != 0:
            print("Solved this question by visiting %d states!" % cnt)
            return (graph, path)

        for rule in rules:
            maps = Maper.getMaps(graph, rule)
            for map in maps:
                ng = Transformer.transform(rule.transRules, graph, map)
                np = list(path)
                np.append(rules.index(rule))
                queue.append(QueueElem(ng, np))

        queue.remove(temp)
        cnt += 1

    return []
Example #17
0
 def build_transformers(self):
     # only needs to happen once if motors dont move w.r.t. the body frame
     # body frame is the devices reference to the world (maybe becomes adaptive someday)
     self.autobots = [
         Transformer(self.refActual[0],
                     self.refActual[1],
                     self.refActual[2],
                     'Body',
                     translation=self.poseActual)
     ]
     for i in range(self.nMotors):
         self.autobots.append(
             Transformer(self.init_m_ref[i, 0],
                         self.init_m_ref[i, 1],
                         self.init_m_ref[i, 2],
                         f'Motor{i}',
                         translation=self.init_m_pose[i],
                         parent=self.autobots[0]))
Example #18
0
def make_model(src_vocab, tgt_vocab, N=6,
               d_model=512, d_ff=2048, h=8, dropout=0.1):
    c = copy.deepcopy
    attn = MultiHeadAttention.MultiHeadAttention(h, d_model)
    ff = PositionwiseFeedForward.PositionwiseFeedForward(
        d_model, d_ff, dropout)
    position = PositionEmbedding(d_model, dropout)
    model = Transformer(
        Encoder(EncoderLayer.EncoderLayer(
            d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer.DecoderLayer(d_model, c(attn), c(attn),
                                          c(ff), dropout), N),
        nn.Sequential(Embedding(d_model, src_vocab), c(position)),
        nn.Sequential(Embedding(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model
Example #19
0
    def __init__(self,
                 input_shape=(100, 500),
                 c_dim=32,
                 gpu=0,
                 resampling=500,
                 model='train',
                 encoder_name='vae',
                 label_classifier_name='lstm'):
        '''

        :param c_dim:  表示的是encoder的输出,同时也是预测网络和对抗网络的输出
        '''
        # 需要构建网络部分 1.编码器, 2.标签, 3.对抗样本判别
        super(DAN, self).__init__()
        self.model = model
        self.input_shape = input_shape
        self.gpu = gpu  # 是否指定gpu
        self.resampling = resampling
        self.dim = c_dim
        self.encoder_name = encoder_name
        self.label_classifier_name = label_classifier_name
        if encoder_name == 'vae':  # 使用vae作为编码器
            self.encoder = VAE(
                input_shape,
                c_dim=c_dim)  # 注意此时的训练方式,是将前一个的编码方式和我下一个的编码方式进行交叉求和

        else:
            self.encoder = CNNEncoder(input_shape, c_dim=c_dim)

        if label_classifier_name == 'lstm':
            self.label_classifier = nn.LSTM(
                input_size=c_dim,
                hidden_size=64,  # rnn hidden unit
                num_layers=1,  # number of rnn layer
                batch_first=True,
                bidirectional=True
                # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
            )
            self.tanh1 = nn.Tanh()
            self.w = nn.Parameter(torch.zeros(64 * 2))
            self.tanh2 = nn.Tanh()
            self.fc1 = nn.Linear(64 * 2, 64)
            self.label_fc = nn.Linear(64, 2)
        else:  # 使用transformer 模型
            self.label_classifier = Transformer()
            # self.label_fc = nn.Linear(768, 2)

        self.domain_classifier = nn.LSTM(
            input_size=c_dim,
            hidden_size=64,  # rnn hidden unit
            num_layers=1,  # number of rnn layer
            batch_first=True
            # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )
        self.domain_fc = nn.Linear(64, 2)  # 判断病人是否来自于同一个人
Example #20
0
def c_02():
    ''' transform from json to string list. Take as input a product in a json, 
    and return as output a list of strings, each string is a token. We use 
    the attributes: by, name, description.'''

    try:
        j_es = {
            'id':
            2,
            'by':
            'khazana',
            'language_code':
            'es',
            'master_id':
            2,
            'name':
            'Khazana arroz Basmati',
            'description':
            '10 libras 4,53 kg. Arroz Basmati premium extra largo. El tesoro.'
        }

        j = {
            'id':
            2,
            'by':
            'khazana',
            'language_code':
            'en',
            'name':
            'Khazana Basmati rice',
            'description':
            '10 lb 4.53 kg. Extra long premium Basmati Rice. The Treasure.'
        }

        t = Transformer()
        tokens = t.get_tokens(j)

        print('\n case 02. Tokens: {} \n'.format(tokens))

    except Exception as e:
        print('c_02(), error: {}'.format(e))
Example #21
0
def main():

    print('loading dataset...')
    dataset, tokenizer = create_datasets()
    dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(
        tf.data.experimental.AUTOTUNE)
    tokenizer.save_to_file('tokenizer')
    print('creating model...')
    model = Transformer(tokenizer.vocab_size + 2, d_model=D_MODEL)
    print('training model...')
    learning_rate = CustomSchedule(D_MODEL)
    optimizer = tf.keras.optimizers.Adam(learning_rate,
                                         beta_1=0.9,
                                         beta_2=0.98,
                                         epsilon=1e-9)

    log = tf.summary.create_file_writer('checkpoints')
    avg_loss = tf.keras.metrics.Mean(name='loss', dtype=tf.float32)
    for epoch in range(EPOCHS):
        for (inputs, outputs) in dataset:
            with tf.GradientTape() as tape:
                pred = model([inputs['inputs'], inputs['dec_inputs']])
                loss = loss_function(outputs['outputs'], pred)
                avg_loss.update_state(loss)
            # write log
            if tf.equal(optimizer.iterations % 100, 0):
                with log.as_default():
                    tf.summary.scalar('loss',
                                      avg_loss.result(),
                                      step=optimizer.iterations)
                    tf.summary.scalar('accuracy',
                                      accuracy(outputs['outputs'], pred),
                                      step=optimizer.iterations)
                print('Step #%d Loss: %.6f' %
                      (optimizer.iterations, avg_loss.result()))
                avg_loss.reset_states()
            grads = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
        # save model once every epoch
        model.save('checkpoints/transformer_%d.h5' % optimizer.iterations)
Example #22
0
def main():
    # if not os.path.exists(args.ckpt_file):
    #     raise FileNotFoundError("model file not found")

    data_dir = '/home/tiankeke/workspace/datas/sumdata/'
    TRAIN_X = os.path.join(data_dir, 'train/train.article.txt')
    TRAIN_Y = os.path.join(data_dir, 'train/train.title.txt')
    TEST_X = args.input_file

    small_vocab_file = 'sumdata/small_vocab.json'
    if os.path.exists(small_vocab_file):
        small_vocab = json.load(open(small_vocab_file))
    else:
        small_vocab = build_vocab([TRAIN_X, TRAIN_Y],
                                  small_vocab_file,
                                  vocab_size=80000)

    max_src_len = 100
    max_tgt_len = 40
    vocab = small_vocab

    test_x = BatchManager(load_data(TEST_X, max_src_len, args.n_test),
                          args.batch_size, small_vocab)

    model = Transformer(len(vocab),
                        len(vocab),
                        200,
                        200,
                        2,
                        4,
                        256,
                        1024,
                        src_tgt_emb_share=True,
                        tgt_prj_wt_share=True).cuda()

    saved_state = torch.load(args.ckpt_file)
    model.load_state_dict(saved_state['state_dict'])
    print('Load model parameters from %s' % args.ckpt_file)

    my_test(test_x, model, small_vocab)
Example #23
0
 def build_transformers(self):
     # the base joint is defined as the world origin
     self.autobots = []
     prev_tf = None
     for i, joint in enumerate(self.init_joint_poses):
         trns = Transformer(0.0,
                            0.0,
                            np.radians(self.poseActual[i]) * self.spin[i],
                            'temp',
                            protocol=['psi'])
         translationActual = trns.transform(joint[:3], inverse=True)
         tf = Transformer(joint[3],
                          joint[4],
                          joint[5] +
                          np.radians(self.poseActual[i]) * self.spin[i],
                          f'Joint{i+1}',
                          translation=translationActual,
                          parent=prev_tf)
         if not prev_tf is None:
             self.autobots[-1].child = tf
         self.autobots.append(tf)
         prev_tf = tf
Example #24
0
    def process_all(self):
        '''This method Extract all the rows SELECTed from the table (mySQL product_translation),
        Transform, and  Load to Redis. This is used for Kill and Fill'''

        try:
            extractor = Extractor()
            #transformer= Transformer( self.params )
            extractor.connect()
            num_of_rows = 20
            extractor.execute()
            rows = extractor.get_next_batch(num_of_rows)
            transformer = Transformer()
            courier = CourierClicoh()
            loader = LoaderCsv()

            while len(rows) > 0:
                products = []
                for row in rows:
                    print('id : {}'.format(row['id']))
                    j = courier.add_product(row)

                    product = transformer.get_csv_row(j, row)
                    '''d = {
                         'id'                   : row[ 'id'                 ], 
                         'sku'                  : row[ 'sku'                ], 
                         'clicoh_id'            : "row[ 'clicoh_id'          ]", 
                         'clicoh_variant_id'    : "row[ 'clicoh_variant_id'  ]",
                    }'''
                    products.append(product)

                loader.write_rows(products)
                rows = extractor.get_next_batch(num_of_rows)

            extractor.close()
            print('\n ETL.procell_all() ... end')

        except Exception as e:
            print('ETL.process_all(), error: {}'.format(e))
            raise
Example #25
0
def c_03():
    ''' Transformer.get_tf()
    '''

    try:
        j = {
            'id':
            2,
            'by':
            'khazana',
            'language_code':
            'en',
            'name':
            'Khazana Basmati rice',
            'description':
            '10 lb 4.53 kg. Extra long premium Basmati Rice. The Treasure.'
        }

        t = Transformer()
        d = t.get_tf(j)

        print('\n case 03. tf: \n {} \n'.format(json.dumps(d, indent=3)))
    except Exception as e:
        print('c_03(), error: {}'.format(e))
Example #26
0
    def __init__(self, config, NN, hh):
        super(DTE, self).__init__()
        self.config = config
        self.input_size = config.word_embedding_size
        if self.config.if_pos:
            self.pos_embed = nn.Embedding(config.pos_tag_kinds, config.pos_embedding_size)
            self.input_size += config.pos_embedding_size
        if self.config.if_char:
            self.char_embed = CharEmbed(config.char_kinds, config.char_embedding_size)
            self.input_size += (2 * config.char_embedding_size)
        self.word_embed = nn.Embedding(config.word_kinds, config.word_embedding_size)
        self.input_dropout = nn.Dropout(config.dropout)

        if self.config.if_transformer:
            self.transformer = Transformer(d_model=self.input_size, N=NN, h=hh,
                                           dropout=0.1, bidirectional=self.config.if_bidirectional)
Example #27
0
	def __init__(self, config):

		super(PostModule, self).__init__()

		# <----------- Config ----------->
		self.config = config
		
		# <----------- Key and val structure encoder ----------->
		if self.config.include_key_structure:
			self.key_structure_encoder = LearnedPositionEncoder.LearnedPositionEncoder(self.config, self.config.n_head)

		if self.config.include_val_structure:
			self.val_structure_encoder = LearnedPositionEncoder.LearnedPositionEncoder(self.config, self.config.n_head)

		# <----------- Getting a transformer for each level (word & post level) ----------->
		self.transformer_post = Transformer.Transformer(self.config, self.config.n_mha_layers, self.config.d_model, self.config.n_head)
		
		# <----------- Embedding the posts through n FC layers (post level) ----------->
		if self.config.ff_post:
			self.emb_layer_post = nn.ModuleList([nn.Linear(self.config.emb_dim, self.config.emb_dim) for _ in range(self.config.num_emb_layers)])

		# <----------- Fine Tunning layer after getting only the first post's embedding  -----------> 
		self.fine_tune_layer = nn.Linear(self.config.emb_dim, self.config.emb_dim)

		# <----------- Final layer to predict the output class (4 classes) (To map emb to classes) -----------> 
		self.final_layer_emb = nn.Sequential(nn.Linear(self.config.emb_dim, self.config.num_classes),
											 nn.LogSoftmax(dim = 1))

		# <----------- To map each post vector to a scalar ----------->
		self.condense_layer_post = nn.Linear(self.config.d_model, 1)

		self.final_layer_posts = nn.Sequential(nn.Linear(self.config.max_tweets, self.config.num_classes),
												nn.LogSoftmax(dim = 1))

		# <----------- Droutput for regularization ----------->
		self.dropout = nn.Dropout(p = self.config.dropout_rate, inplace = True)

		# <----------- Initialization of weights ----------->
		if self.config.ff_post:
			self.emb_layer_post.apply(PostModule.init_weights)

		self.fine_tune_layer.apply(PostModule.init_weights)
		self.final_layer_emb.apply(PostModule.init_weights)
		
		self.condense_layer_post.apply(PostModule.init_weights)

		self.final_layer_posts.apply(PostModule.init_weights)
Example #28
0
    def __init__(self, config):
        super(TOICNN, self).__init__()
        self.config = config
        self.outfile = None

        self.input_size = config.word_embedding_size
        if self.config.if_pos:
            self.pos_embed = nn.Embedding(self.config.pos_tag_kinds,
                                          self.config.pos_embedding_size)
            self.input_size += self.config.pos_embedding_size
        if self.config.if_char:
            self.char_embed = CharEmbed(self.config.char_kinds,
                                        self.config.char_embedding_size)
            self.input_size += (2 * self.config.char_embedding_size)
        self.word_embed = nn.Embedding(self.config.word_kinds,
                                       self.config.word_embedding_size)
        self.input_dropout = nn.Dropout(self.config.dropout)

        if self.config.if_transformer:
            self.transformer = Transformer(
                d_model=self.input_size,
                N=self.config.N,
                h=self.config.h,
                dropout=0.1,
                bidirectional=self.config.if_bidirectional)

        self.cnn = nn.Sequential(
            nn.Conv2d(in_channels=1,
                      out_channels=self.config.feature_maps_size,
                      kernel_size=(self.config.kernel_size, self.input_size),
                      padding=(self.config.kernel_size // 2, 0)), nn.ReLU())

        self.hat = TOI_Pooling(self.config.feature_maps_size,
                               self.config.if_gpu)
        self.flatten_feature = self.config.feature_maps_size * 3

        self.cls_fcs = nn.Sequential(
            nn.Linear(self.flatten_feature, self.flatten_feature),
            nn.Dropout(self.config.dropout), nn.ReLU(),
            nn.Linear(self.flatten_feature, self.config.label_kinds))

        self.cls_ce_loss = nn.CrossEntropyLoss()
Example #29
0
def main():
    vocab, max_src_len, max_tgt_len, inputs, targets = load_data('vocab.json',
                                                                 n_data=850)
    inputs, targets = shuffle(inputs, targets)

    # set d_model = d_word_vec
    model = Transformer(n_src_vocab=len(vocab),
                        n_tgt_vocab=len(vocab),
                        max_src_len=max_src_len,
                        max_tgt_len=max_tgt_len,
                        d_word_vec=32,
                        N=6,
                        n_head=4,
                        d_q=16,
                        d_k=16,
                        d_v=16,
                        d_model=32,
                        d_inner=64)
    model.cuda()
    # model = Encoder(len(vocab), max_src_len, d_src_emb=32, N=3, n_head=4,
    #                 d_q=16, d_k=16, d_v=16, d_model=32, d_inner=32)

    model_file = 'models/params_transformer.pkl'
    if os.path.exists(model_file):
        print("Loading parameters from %s" % model_file)
        model.load_state_dict(torch.load(model_file))

    train_idx = int(len(inputs) * 0.90)
    valid_idx = int(len(inputs) * 0.95)

    train_x = BatchManager(inputs[:train_idx], 32)
    train_y = BatchManager(targets[:train_idx], 32)

    valid_x = BatchManager(inputs[train_idx:valid_idx], 64)
    valid_y = BatchManager(targets[train_idx:valid_idx], 64)

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = Adam(parameters, lr=0.0001)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=1)

    # train(train_x, train_y, valid_x, valid_y, model, optimizer, n_epochs=100, scheduler=scheduler)
    eval(model, vocab, inputs[train_idx:], targets[train_idx:], out_len=12)
    def __init__(self, config):

        super(TransformerBaseline, self).__init__()

        self.config = config

        # <----------- Getting a transformer (post level) ----------->
        self.transformer_post = Transformer.Transformer(
            self.config, self.config.n_mha_layers, self.config.d_model,
            self.config.n_head)

        # <----------- Embedding the key, val and query ----------->
        self.emb_layer_query = nn.ModuleList([
            nn.Linear(self.config.emb_dim, self.config.emb_dim)
            for _ in range(self.config.num_emb_layers)
        ])
        self.emb_layer_val = nn.ModuleList([
            nn.Linear(self.config.emb_dim, self.config.emb_dim)
            for _ in range(self.config.num_emb_layers)
        ])
        self.emb_layer_key = nn.ModuleList([
            nn.Linear(self.config.emb_dim, self.config.emb_dim)
            for _ in range(self.config.num_emb_layers)
        ])

        # <----------- Layer Normalization ----------->
        self.layer_norm = nn.LayerNorm(normalized_shape=self.config.emb_dim)

        # <----------- Final layer to predict the output class (4 classes) ----------->
        self.final_layer = nn.Sequential(
            nn.Linear(self.config.d_model, self.config.num_classes),
            nn.LogSoftmax(dim=1))

        # <----------- Initialization of weights ----------->
        self.emb_layer_query.apply(Transformer.init_weights)
        self.emb_layer_val.apply(Transformer.init_weights)
        self.emb_layer_key.apply(Transformer.init_weights)
        self.final_layer.apply(Transformer.init_weights)
Example #31
0
    try:
        from Reporter import Reporter
        r = Reporter()
        if not r.load():
            raise Exception("could not load document")
        
        if r.appendNewEntry():
            r.checkAlarms()
            if r.save():
                successfully = True
                if not Config.RSS_AND_HTML_BASE_URL:
                    print("RSS_AND_HTML_BASE_URL not configured in Config.py\nWon't do conversion to rss or html")
                else:
                    from Transformer import Transformer
                    urlbase = Config.RSS_AND_HTML_BASE_URL
                    if not urlbase.endswith("/"):
                        urlbase = urlbase + "/"
                    x = Transformer(copyToDir=Config.COPY_RSS_AND_HTML_HERE)
                    x.makeTransformations(baseURL=urlbase)
        else:
            print("error while appending new entry!")
    except:
        print("error:")
        import traceback
        print(sys.exc_info()[0])
        print(traceback.format_exc())
    print("Finished "+(successfully and "successfully" or "with errors"))
    
    
    # end