Example #1
0
def train_iters(model, n_epochs, print_every=1000, learning_rate=0.01):
    start = time.time()
    print_loss_total = 0

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    input_lang, output_lang, pairs = data.prepareData('eng', 'deu', True)
    training_pairs = [
        variables_from_pairs(input_lang, output_lang, random.choice(pairs))
        for i in range(n_epochs)
    ]
    criterion = nn.CrossEntropyLoss()

    for epoch in range(n_epochs):
        training_pair = training_pairs[epoch]
        input_variable = training_pair[0]
        target_variable = training_pair[1]

        print(input_variable)
        print(target_variable)

        loss = train(model=model,
                     optimizer=optimizer,
                     input_variable=input_variable,
                     target_variable=target_variable,
                     criterion=criterion)
        print_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' %
                  (timeSince(start, iter / n_epochs), iter,
                   iter / n_epochs * 100, print_loss_avg))
Example #2
0
def test_accuracy(enc, dec, fname):
    # pairs=[]
    # with open('data/'+name,'r') as f:
    #     for line in f:
    #         src,tar=line.strip('\n').split('\t')
    #         pairs.append((src,tar))
    input_lang, output_lang, pairs = data.prepareData(fname, reverse=False)

    batch_pairs = main.to_batch(main.input_lang, main.output_lang, pairs,
                                BATCH_SIZE)
    return accuracy(enc, dec, batch_pairs)
Example #3
0
    def predictor(self):

        weight = self.meta_index[(self.meta_index['store'] == self.store_code) & (
            self.meta_index['product'] == self.product_name)].iloc[0]['weight']

        # outlier = meta_index[(meta_index['store'] == store_code) & (
        #     meta_index['product'] == product_name)].iloc[0]['outlier']

        weight = './weight/' + weight

        prepareData1 = prepareData(merged_df=self.merged_df,
                                   product_name=self.product_name,
                                   store_code=self.store_code,
                                   train_date=self.train_date,
                                   predict_date=self.predict_date,
                                   sequence_x=self.sequence_x,
                                   sequence_y=self.sequence_y)

        df, df_train, df_test, sale_qty, x_columns, x_1_columns = prepareData1.sep_data2()

        x_scaler, x_1_scaler, y_scaler, column_num_x, column_num_x_1, x_columns, x_1_columns, sale_qty = prepareData1.scaled_origin()

        x_test_scaled, x_test_1_scaled, y_test_scaled = prepareData1.scaled_data(
            df_train=df_test)

        model = create_model(column_num_x, column_num_x_1,
                             self.sequence_x, self.sequence_y)

        np.nan_to_num(x_test_1_scaled, copy=False)

        print(x_test_scaled.shape, x_test_1_scaled.shape)

        next_week_sales = prediction(x_test_scaled[-2:, :, :], x_test_1_scaled[-2:, :, :],
                                     y_scaler, weight=weight, model=model)

        # print(next_week_sales)

        return next_week_sales
Example #4
0
def getResultOfUri(uri, feeling, activity, location):
    model = None

    from sklearn import preprocessing

    fle = preprocessing.LabelEncoder()
    ale = preprocessing.LabelEncoder()
    lle = preprocessing.LabelEncoder()
    gle = preprocessing.LabelEncoder()

    dfg = prepareData(uri)

    dfgCopy = dfg.copy()
    dfgCopy['feeling'] = fle.fit_transform(dfg['feeling'])
    dfgCopy['activity'] = ale.fit_transform(dfg['activity'])
    dfgCopy['location'] = lle.fit_transform(dfg['location'])

    del dfgCopy['genre']

    labels = gle.fit_transform(dfg['genre'])

    X = dfgCopy[dfgCopy.columns[:]]
    y = labels

    if (uri not in modelResultCache):
        modelResultCache[uri] = getModel(X, y)

    model = modelResultCache[uri]
    # like  hate  restart  feeling  activity  location

    toPredict = [[1,  0,  1,  transformOrDefault(fle, feeling), transformOrDefault(ale, activity), transformOrDefault(lle, location)]]

    print(f"predicting {toPredict}")
    result = model.predict(toPredict)

    return gle.inverse_transform(result)[0]
Example #5
0
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()


def evaluateAndShowAttention(input_sentence):
    output_words, attentions = evaluate(encoder1, attn_decoder1,
                                        input_sentence)
    print('input =', input_sentence)
    print('output =', ' '.join(output_words))
    showAttention(input_sentence, output_words, attentions)


if __name__ == '__main__':
    input_lang, output_lang, pairs = data.prepareData('eng', 'fra', True)
    print(random.choice(pairs))

    teacher_forcing_ratio = 0.5

    hidden_size = 256
    encoder1 = model.EncoderRNN(input_lang.n_words, hidden_size,
                                args.device).to(args.device)
    attn_decoder1 = model.AttnDecoderRNN(hidden_size, output_lang.n_words,
                                         args.device, 0.1,
                                         args.MAX_LENGTH).to(args.device)

    trainIters(encoder1, attn_decoder1, 75000, print_every=5000)

    ######################################################################
Example #6
0
        encoder_hidden = encoder.initHidden(device)
        for ei in range(input2_length):
            _, encoder_hidden = encoder(input2_tensor[ei], encoder_hidden)
        encoded2_tensor = encoder_hidden[-1]

        classifier_output = classifier(encoded1_tensor, encoded2_tensor)

        return classifier_output


def evaluateAll(dataset, encoder, classifier, device):
    for pair in dataset:
        output = evaluate(encoder, classifier, pair[0], pair[1], device)
        topv, topi = output.data.topk(1)
        prediction = int(topi.view(1)[0])
        print(prediction)


if __name__ == '__main__':

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    lang = data.loadLang(sys.argv[1], include=[sys.argv[2]], cache=False)

    _, entries = data.prepareData(sys.argv[3], [], [], lang)

    encoder1 = torch.load('/home/rodrigo/ml/deepopt/test-4/' + sys.argv[2] +
                          '/encoder.pt')
    classifier1 = torch.load('/home/rodrigo/ml/deepopt/test-4/' + sys.argv[2] +
                             '/classifier.pt')
    evaluateAll(entries, encoder1, classifier1, device)
Example #7
0
    return inputs[i:i+bsz],\
           masks[i:i+bsz],\
           targets[i:i+bsz]

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

if __name__ == "__main__":
    # prepare data
    device = torch.device("cpu")     
    
    inputs, masks, targets = prepareData()
    inputs, masks, targets = inputs.to(device), masks.to(device), targets.to(device)
       
 
    bsz = 1 #TODO: batchsize and seq_len is the issue to be addressed
    #i = 5
   

    # setup model
    from model import RNN
    input_size = 1
    hidden_size = 3
    output_size = 2
    
    rnn = RNN(input_size, hidden_size, output_size).to(device)
            current_loss += loss

    return current_loss / n_batches


def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


if __name__ == "__main__":
    # prepare data
    np_data, np_labels, np_vdata, np_vlabels = prepareData()
    batch_size = args.batch_size  #TODO: batchsize and seq_len is the issue to be addressed
    n_epoches = args.max_epochs

    batches = batchify(np_data, batch_size, np_labels)
    vbatches = batchify(np_vdata, batch_size, np_vlabels)

    device = torch.device("cuda")

    # setup model
    from model import RNN, NaiveRNN
    input_size = 2
    hidden_size = args.hidden_size
    output_size = 2

    rnn = RNN(input_size, hidden_size, output_size, batch_size).to(device)
Example #9
0
def main():

    global args, max_length
    args = parser.parse_args()

    if args.eval:

        if not os.path.exists(args.output_dir):
            print("Output directory do not exists")
            exit(0)
        try:
            model = EncoderDecoder().load(args.output_dir)
            print("Model loaded successfully")
        except:
            print("The trained model could not be loaded...")
            exit()

        test_pairs = readFile(args.test_file)

        outputs = model.evaluatePairs(test_pairs, rand=False, char=args.char)
        writeToFile(outputs, os.path.join(args.output_dir, "output.pkl"))
        reference = []
        hypothesis = []

        for (hyp, ref) in outputs:
            if args.char or args.char_bleu:
                reference.append([list(ref)])
                hypothesis.append(list(hyp))
            else:
                reference.append([ref.split(" ")])
                hypothesis.append(hyp.split(" "))

        bleu_score = compute_bleu(reference, hypothesis)
        print("Bleu Score: " + str(bleu_score))

        print(
            model.evaluateAndShowAttention(
                "L'anglais n'est pas facile pour nous.", char=args.char))
        print(
            model.evaluateAndShowAttention(
                "J'ai dit que l'anglais est facile.", char=args.char))
        print(
            model.evaluateAndShowAttention(
                "Je n'ai pas dit que l'anglais est une langue facile.",
                char=args.char))
        print(
            model.evaluateAndShowAttention("Je fais un blocage sur l'anglais.",
                                           char=args.char))

    else:
        input_lang, output_lang, pairs = prepareData(args.train_file)

        print(random.choice(pairs))

        if args.char:
            model = EncoderDecoder(args.hidden_size, input_lang.n_chars,
                                   output_lang.n_chars, args.drop, args.tfr,
                                   args.max_length, args.lr, args.simple,
                                   args.bidirectional, args.dot, False, 1)
        else:
            model = EncoderDecoder(args.hidden_size, input_lang.n_words,
                                   output_lang.n_words, args.drop, args.tfr,
                                   args.max_length, args.lr, args.simple,
                                   args.bidirectional, args.dot, args.multi,
                                   args.num_layers)

        model.trainIters(pairs,
                         input_lang,
                         output_lang,
                         args.n_iters,
                         print_every=args.print_every,
                         plot_every=args.plot_every,
                         char=args.char)
        model.save(args.output_dir)
        model.evaluatePairs(pairs, char=args.char)
Example #10
0
File: main.py Project: marcwww/srnn
                      value=PAD) for sen in batch_tar
            ]

            # the transposing makes the data of the size: length * batch_size
            res.append((torch.stack(padded_src).t().contiguous().to(DEVICE),
                        torch.stack(padded_tar).t().contiguous().to(DEVICE)))
            batch_src = []
            batch_tar = []

    # res: list of batch pairs
    return res


# src_name, tar_name = TRAIN_FILE.split('-')
# input_lang, output_lang, pairs = data.prepareData('spa', 'en', True)
input_lang, output_lang, pairs = data.prepareData(TRAIN_FILE, reverse=False)
batch_pairs = to_batch(input_lang, output_lang, pairs, batch_size=BATCH_SIZE)

if args.model == 'stack':
    enc = stack.EncoderSRNN(input_size=input_lang.n_words,
                                hidden_size=args.hidden,
                                nstack=args.nstack,
                                stack_depth=args.stack_depth,
                                stack_size=args.stack_size,
                                stack_elem_size=args.stack_elem_size).\
                                to(DEVICE)
    dec = stack.DecoderSRNN(output_size=output_lang.n_words,
                                hidden_size=args.hidden,
                                nstack=args.nstack,
                                stack_depth=args.stack_depth,
                                stack_size=args.stack_size,
Example #11
0
import net as rede
import data

#print(data.countTimeFold("/home/joseildo/SpeechDetection/musan/speech/librivox/"),"segundos")

data.loadFromFold('/home/joseildo/SpeechDetection/musan/speech/librivox/')
data.notSilence = "2"
data.loadFromFold("/home/joseildo/SpeechDetection/musan/noise/sound-bible")

data.prepareData(x, y, shuffle=True)

rede.model.save_weights(
    '/home/joseildo/SpeechDetection/pyHumanDetect/pesosPorraa.h5')
Example #12
0
#     with open(mapping_path, 'rb') as f:
#         mappings = cPickle.load(f)
#         input_lang = mappings['input_lang']
#         output_lang = mappings['output_lang']
#         pairs = mappings['pairs']
# else:
#     input_lang, output_lang, pairs = prepareData(parameters['lang1'], parameters['lang2'], True)
#     with open(mapping_path, 'wb') as f:
#         mappings = {
#             input_lang,
#             output_lang,
#             pairs,
#         }
#         cPickle.dump(mappings, f)

input_lang, output_lang, pairs = prepareData(parameters['lang1'],
                                             parameters['lang2'], True)


def train(input_word_tensor, target_word_tensor, input_char_tensor, encoder,
          decoder, encoder_optimizer, decoder_optimizer, criterion,
          max_length):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_word_tensor.size(0)
    target_length = target_word_tensor.size(0)

    encoder_outputs = torch.zeros(max_length,
                                  encoder.hidden_size,
Example #13
0

def training():
    context_x = Context(inputs_dict.n_words, hidden_size).to(device)
    classification_x = Classification().to(device)

    context_x, classification_x, plot_losses = trainIters(context_x,
                                                          classification_x,
                                                          device,
                                                          inputs_dict,
                                                          target_dict,
                                                          pairs,
                                                          n_iters,
                                                          print_every=50)

    return context_x, classification_x, plot_losses


if __name__ == '__main__':

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    name = 'data'
    max_length = 10
    inputs_dict, target_dict, pairs = prepareData(name, max_length)
    #print(target_dict.word2index)
    hidden_size = 256
    n_iters = 700
    context_x, classification_x, plot_losses = training()
    showPlot(plot_losses)
    evaluateRandomly(context_x, classification_x, device, inputs_dict,
                     target_dict, pairs, 5)
Example #14
0
import numpy as np
import pandas as pd
import xgboost as xgb
import gc
import data

print('Loading data ...')

x_train, y_train, x_valid, y_valid, x_test = data.prepareData()

print(x_train.shape)
print(y_train.shape)
print(x_valid.shape)
print(y_valid.shape)
print(x_test.shape)

print('Building DMatrix...')

d_train = xgb.DMatrix(x_train, label=y_train)
d_valid = xgb.DMatrix(x_valid, label=y_valid)

del x_train, x_valid; gc.collect()

print('Training ...')
'''
params = {}
params['eta'] = 0.02
params['objective'] = 'reg:linear'
params['eval_metric'] = 'mae'
params['max_depth'] = 6
params['silent'] = 1
Example #15
0
    def trainer(self):

        meta_index = pd.DataFrame(
            data=[[
                1, '백산수2.0L', 'promotion_flag_1_bac_2', 'sale_qty_1_bac_2',
                '1_bac2.hdf5'
            ],
                  [
                      1, '백산수500ml', 'promotion_flag_1_bac_5',
                      'sale_qty_1_bac_5', '1_bac5.hdf5'
                  ],
                  [
                      1, '신라면멀티', 'promotion_flag_1_sin', 'sale_qty_1_sin',
                      '1_sin.hdf5'
                  ],
                  [
                      1, '안성탕면멀티', 'promotion_flag_1_ans', 'sale_qty_1_ans',
                      '1_ans.hdf5'
                  ],
                  [
                      1, '진라면멀티(순한맛)', 'promotion_flag_1_jin',
                      'sale_qty_1_jin', '1_jin.hdf5'
                  ],
                  [
                      6, '백산수2.0L', 'promotion_flag_6_bac_2',
                      'sale_qty_6_bac_2', '6_bac2.hdf5'
                  ],
                  [
                      6, '백산수500ml', 'promotion_flag_6_bac_5',
                      'sale_qty_6_bac_5', '6_bac5.hdf5'
                  ],
                  [
                      6, '신라면멀티', 'promotion_flag_6_sin', 'sale_qty_6_sin',
                      '6_sin.hdf5'
                  ],
                  [
                      6, '안성탕면멀티', 'promotion_flag_6_ans', 'sale_qty_6_ans',
                      '6_ans.hdf5'
                  ],
                  [
                      6, '진라면멀티(순한맛)', 'promotion_flag_6_jin',
                      'sale_qty_6_jin', '6_jin.hdf5'
                  ]],
            columns=['store', 'product', 'promotion', 'sale', 'weight'])

        model_name = meta_index[(meta_index['store'] == self.store_code) & (
            meta_index['product'] == self.product_name)].iloc[0]['weight']

        prepareData1 = prepareData(merged_df=self.merged_df,
                                   product_name=self.product_name,
                                   store_code=self.store_code,
                                   train_date=self.train_date,
                                   predict_date=self.predict_date,
                                   sequence_x=self.sequence_x,
                                   sequence_y=self.sequence_y)

        df, df_train, df_test, sale_qty, x_columns, x_1_columns = prepareData1.sep_data2(
        )

        (x_scaler, x_1_scaler, y_scaler, column_num_x, column_num_x_1,
         x_columns, x_1_columns, sale_qty) = prepareData1.scaled_origin()

        print(df_train)

        x_train_scaled, x_train_1_scaled, y_train_scaled = prepareData1.scaled_data(
            df_train=df_train)

        model = create_model(column_num_x, column_num_x_1, self.sequence_x,
                             self.sequence_y)

        model.compile(
            optimizer=keras.optimizers.Adam(),
            loss=[
                keras.losses.Huber(),  # MeanSquaredError,Huber
            ],
            metrics=['mse'])

        filepath = './weight/' + model_name

        checkpoint_path = filepath

        checkpoint = ModelCheckpoint(checkpoint_path,
                                     monitor='loss',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='min')

        earlystop = EarlyStopping(
            monitor='loss',
            min_delta=0,
            patience=30,
            verbose=0,
            mode='auto',
            baseline=None,
            restore_best_weights=False,
        )

        callbacks_list = [checkpoint, earlystop]

        history = model.fit({
            "long": x_train_scaled,
            "short": x_train_1_scaled
        }, {"prediction": y_train_scaled},
                            epochs=2000,
                            batch_size=32,
                            callbacks=callbacks_list,
                            shuffle=False)

        scores = model.evaluate(x=(x_train_scaled, x_train_1_scaled),
                                y=y_train_scaled,
                                verbose=0)

        # return history
        return scores