def train_iters(model, n_epochs, print_every=1000, learning_rate=0.01): start = time.time() print_loss_total = 0 optimizer = optim.Adam(model.parameters(), lr=learning_rate) input_lang, output_lang, pairs = data.prepareData('eng', 'deu', True) training_pairs = [ variables_from_pairs(input_lang, output_lang, random.choice(pairs)) for i in range(n_epochs) ] criterion = nn.CrossEntropyLoss() for epoch in range(n_epochs): training_pair = training_pairs[epoch] input_variable = training_pair[0] target_variable = training_pair[1] print(input_variable) print(target_variable) loss = train(model=model, optimizer=optimizer, input_variable=input_variable, target_variable=target_variable, criterion=criterion) print_loss_total += loss if iter % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_epochs), iter, iter / n_epochs * 100, print_loss_avg))
def test_accuracy(enc, dec, fname): # pairs=[] # with open('data/'+name,'r') as f: # for line in f: # src,tar=line.strip('\n').split('\t') # pairs.append((src,tar)) input_lang, output_lang, pairs = data.prepareData(fname, reverse=False) batch_pairs = main.to_batch(main.input_lang, main.output_lang, pairs, BATCH_SIZE) return accuracy(enc, dec, batch_pairs)
def predictor(self): weight = self.meta_index[(self.meta_index['store'] == self.store_code) & ( self.meta_index['product'] == self.product_name)].iloc[0]['weight'] # outlier = meta_index[(meta_index['store'] == store_code) & ( # meta_index['product'] == product_name)].iloc[0]['outlier'] weight = './weight/' + weight prepareData1 = prepareData(merged_df=self.merged_df, product_name=self.product_name, store_code=self.store_code, train_date=self.train_date, predict_date=self.predict_date, sequence_x=self.sequence_x, sequence_y=self.sequence_y) df, df_train, df_test, sale_qty, x_columns, x_1_columns = prepareData1.sep_data2() x_scaler, x_1_scaler, y_scaler, column_num_x, column_num_x_1, x_columns, x_1_columns, sale_qty = prepareData1.scaled_origin() x_test_scaled, x_test_1_scaled, y_test_scaled = prepareData1.scaled_data( df_train=df_test) model = create_model(column_num_x, column_num_x_1, self.sequence_x, self.sequence_y) np.nan_to_num(x_test_1_scaled, copy=False) print(x_test_scaled.shape, x_test_1_scaled.shape) next_week_sales = prediction(x_test_scaled[-2:, :, :], x_test_1_scaled[-2:, :, :], y_scaler, weight=weight, model=model) # print(next_week_sales) return next_week_sales
def getResultOfUri(uri, feeling, activity, location): model = None from sklearn import preprocessing fle = preprocessing.LabelEncoder() ale = preprocessing.LabelEncoder() lle = preprocessing.LabelEncoder() gle = preprocessing.LabelEncoder() dfg = prepareData(uri) dfgCopy = dfg.copy() dfgCopy['feeling'] = fle.fit_transform(dfg['feeling']) dfgCopy['activity'] = ale.fit_transform(dfg['activity']) dfgCopy['location'] = lle.fit_transform(dfg['location']) del dfgCopy['genre'] labels = gle.fit_transform(dfg['genre']) X = dfgCopy[dfgCopy.columns[:]] y = labels if (uri not in modelResultCache): modelResultCache[uri] = getModel(X, y) model = modelResultCache[uri] # like hate restart feeling activity location toPredict = [[1, 0, 1, transformOrDefault(fle, feeling), transformOrDefault(ale, activity), transformOrDefault(lle, location)]] print(f"predicting {toPredict}") result = model.predict(toPredict) return gle.inverse_transform(result)[0]
ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) ax.yaxis.set_major_locator(ticker.MultipleLocator(1)) plt.show() def evaluateAndShowAttention(input_sentence): output_words, attentions = evaluate(encoder1, attn_decoder1, input_sentence) print('input =', input_sentence) print('output =', ' '.join(output_words)) showAttention(input_sentence, output_words, attentions) if __name__ == '__main__': input_lang, output_lang, pairs = data.prepareData('eng', 'fra', True) print(random.choice(pairs)) teacher_forcing_ratio = 0.5 hidden_size = 256 encoder1 = model.EncoderRNN(input_lang.n_words, hidden_size, args.device).to(args.device) attn_decoder1 = model.AttnDecoderRNN(hidden_size, output_lang.n_words, args.device, 0.1, args.MAX_LENGTH).to(args.device) trainIters(encoder1, attn_decoder1, 75000, print_every=5000) ######################################################################
encoder_hidden = encoder.initHidden(device) for ei in range(input2_length): _, encoder_hidden = encoder(input2_tensor[ei], encoder_hidden) encoded2_tensor = encoder_hidden[-1] classifier_output = classifier(encoded1_tensor, encoded2_tensor) return classifier_output def evaluateAll(dataset, encoder, classifier, device): for pair in dataset: output = evaluate(encoder, classifier, pair[0], pair[1], device) topv, topi = output.data.topk(1) prediction = int(topi.view(1)[0]) print(prediction) if __name__ == '__main__': device = torch.device("cuda" if torch.cuda.is_available() else "cpu") lang = data.loadLang(sys.argv[1], include=[sys.argv[2]], cache=False) _, entries = data.prepareData(sys.argv[3], [], [], lang) encoder1 = torch.load('/home/rodrigo/ml/deepopt/test-4/' + sys.argv[2] + '/encoder.pt') classifier1 = torch.load('/home/rodrigo/ml/deepopt/test-4/' + sys.argv[2] + '/classifier.pt') evaluateAll(entries, encoder1, classifier1, device)
return inputs[i:i+bsz],\ masks[i:i+bsz],\ targets[i:i+bsz] def timeSince(since): now = time.time() s = now - since m = math.floor(s / 60) s -= m * 60 return '%dm %ds' % (m, s) if __name__ == "__main__": # prepare data device = torch.device("cpu") inputs, masks, targets = prepareData() inputs, masks, targets = inputs.to(device), masks.to(device), targets.to(device) bsz = 1 #TODO: batchsize and seq_len is the issue to be addressed #i = 5 # setup model from model import RNN input_size = 1 hidden_size = 3 output_size = 2 rnn = RNN(input_size, hidden_size, output_size).to(device)
current_loss += loss return current_loss / n_batches def timeSince(since): now = time.time() s = now - since m = math.floor(s / 60) s -= m * 60 return '%dm %ds' % (m, s) if __name__ == "__main__": # prepare data np_data, np_labels, np_vdata, np_vlabels = prepareData() batch_size = args.batch_size #TODO: batchsize and seq_len is the issue to be addressed n_epoches = args.max_epochs batches = batchify(np_data, batch_size, np_labels) vbatches = batchify(np_vdata, batch_size, np_vlabels) device = torch.device("cuda") # setup model from model import RNN, NaiveRNN input_size = 2 hidden_size = args.hidden_size output_size = 2 rnn = RNN(input_size, hidden_size, output_size, batch_size).to(device)
def main(): global args, max_length args = parser.parse_args() if args.eval: if not os.path.exists(args.output_dir): print("Output directory do not exists") exit(0) try: model = EncoderDecoder().load(args.output_dir) print("Model loaded successfully") except: print("The trained model could not be loaded...") exit() test_pairs = readFile(args.test_file) outputs = model.evaluatePairs(test_pairs, rand=False, char=args.char) writeToFile(outputs, os.path.join(args.output_dir, "output.pkl")) reference = [] hypothesis = [] for (hyp, ref) in outputs: if args.char or args.char_bleu: reference.append([list(ref)]) hypothesis.append(list(hyp)) else: reference.append([ref.split(" ")]) hypothesis.append(hyp.split(" ")) bleu_score = compute_bleu(reference, hypothesis) print("Bleu Score: " + str(bleu_score)) print( model.evaluateAndShowAttention( "L'anglais n'est pas facile pour nous.", char=args.char)) print( model.evaluateAndShowAttention( "J'ai dit que l'anglais est facile.", char=args.char)) print( model.evaluateAndShowAttention( "Je n'ai pas dit que l'anglais est une langue facile.", char=args.char)) print( model.evaluateAndShowAttention("Je fais un blocage sur l'anglais.", char=args.char)) else: input_lang, output_lang, pairs = prepareData(args.train_file) print(random.choice(pairs)) if args.char: model = EncoderDecoder(args.hidden_size, input_lang.n_chars, output_lang.n_chars, args.drop, args.tfr, args.max_length, args.lr, args.simple, args.bidirectional, args.dot, False, 1) else: model = EncoderDecoder(args.hidden_size, input_lang.n_words, output_lang.n_words, args.drop, args.tfr, args.max_length, args.lr, args.simple, args.bidirectional, args.dot, args.multi, args.num_layers) model.trainIters(pairs, input_lang, output_lang, args.n_iters, print_every=args.print_every, plot_every=args.plot_every, char=args.char) model.save(args.output_dir) model.evaluatePairs(pairs, char=args.char)
value=PAD) for sen in batch_tar ] # the transposing makes the data of the size: length * batch_size res.append((torch.stack(padded_src).t().contiguous().to(DEVICE), torch.stack(padded_tar).t().contiguous().to(DEVICE))) batch_src = [] batch_tar = [] # res: list of batch pairs return res # src_name, tar_name = TRAIN_FILE.split('-') # input_lang, output_lang, pairs = data.prepareData('spa', 'en', True) input_lang, output_lang, pairs = data.prepareData(TRAIN_FILE, reverse=False) batch_pairs = to_batch(input_lang, output_lang, pairs, batch_size=BATCH_SIZE) if args.model == 'stack': enc = stack.EncoderSRNN(input_size=input_lang.n_words, hidden_size=args.hidden, nstack=args.nstack, stack_depth=args.stack_depth, stack_size=args.stack_size, stack_elem_size=args.stack_elem_size).\ to(DEVICE) dec = stack.DecoderSRNN(output_size=output_lang.n_words, hidden_size=args.hidden, nstack=args.nstack, stack_depth=args.stack_depth, stack_size=args.stack_size,
import net as rede import data #print(data.countTimeFold("/home/joseildo/SpeechDetection/musan/speech/librivox/"),"segundos") data.loadFromFold('/home/joseildo/SpeechDetection/musan/speech/librivox/') data.notSilence = "2" data.loadFromFold("/home/joseildo/SpeechDetection/musan/noise/sound-bible") data.prepareData(x, y, shuffle=True) rede.model.save_weights( '/home/joseildo/SpeechDetection/pyHumanDetect/pesosPorraa.h5')
# with open(mapping_path, 'rb') as f: # mappings = cPickle.load(f) # input_lang = mappings['input_lang'] # output_lang = mappings['output_lang'] # pairs = mappings['pairs'] # else: # input_lang, output_lang, pairs = prepareData(parameters['lang1'], parameters['lang2'], True) # with open(mapping_path, 'wb') as f: # mappings = { # input_lang, # output_lang, # pairs, # } # cPickle.dump(mappings, f) input_lang, output_lang, pairs = prepareData(parameters['lang1'], parameters['lang2'], True) def train(input_word_tensor, target_word_tensor, input_char_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length): encoder_hidden = encoder.initHidden() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() input_length = input_word_tensor.size(0) target_length = target_word_tensor.size(0) encoder_outputs = torch.zeros(max_length, encoder.hidden_size,
def training(): context_x = Context(inputs_dict.n_words, hidden_size).to(device) classification_x = Classification().to(device) context_x, classification_x, plot_losses = trainIters(context_x, classification_x, device, inputs_dict, target_dict, pairs, n_iters, print_every=50) return context_x, classification_x, plot_losses if __name__ == '__main__': device = torch.device("cuda" if torch.cuda.is_available() else "cpu") name = 'data' max_length = 10 inputs_dict, target_dict, pairs = prepareData(name, max_length) #print(target_dict.word2index) hidden_size = 256 n_iters = 700 context_x, classification_x, plot_losses = training() showPlot(plot_losses) evaluateRandomly(context_x, classification_x, device, inputs_dict, target_dict, pairs, 5)
import numpy as np import pandas as pd import xgboost as xgb import gc import data print('Loading data ...') x_train, y_train, x_valid, y_valid, x_test = data.prepareData() print(x_train.shape) print(y_train.shape) print(x_valid.shape) print(y_valid.shape) print(x_test.shape) print('Building DMatrix...') d_train = xgb.DMatrix(x_train, label=y_train) d_valid = xgb.DMatrix(x_valid, label=y_valid) del x_train, x_valid; gc.collect() print('Training ...') ''' params = {} params['eta'] = 0.02 params['objective'] = 'reg:linear' params['eval_metric'] = 'mae' params['max_depth'] = 6 params['silent'] = 1
def trainer(self): meta_index = pd.DataFrame( data=[[ 1, '백산수2.0L', 'promotion_flag_1_bac_2', 'sale_qty_1_bac_2', '1_bac2.hdf5' ], [ 1, '백산수500ml', 'promotion_flag_1_bac_5', 'sale_qty_1_bac_5', '1_bac5.hdf5' ], [ 1, '신라면멀티', 'promotion_flag_1_sin', 'sale_qty_1_sin', '1_sin.hdf5' ], [ 1, '안성탕면멀티', 'promotion_flag_1_ans', 'sale_qty_1_ans', '1_ans.hdf5' ], [ 1, '진라면멀티(순한맛)', 'promotion_flag_1_jin', 'sale_qty_1_jin', '1_jin.hdf5' ], [ 6, '백산수2.0L', 'promotion_flag_6_bac_2', 'sale_qty_6_bac_2', '6_bac2.hdf5' ], [ 6, '백산수500ml', 'promotion_flag_6_bac_5', 'sale_qty_6_bac_5', '6_bac5.hdf5' ], [ 6, '신라면멀티', 'promotion_flag_6_sin', 'sale_qty_6_sin', '6_sin.hdf5' ], [ 6, '안성탕면멀티', 'promotion_flag_6_ans', 'sale_qty_6_ans', '6_ans.hdf5' ], [ 6, '진라면멀티(순한맛)', 'promotion_flag_6_jin', 'sale_qty_6_jin', '6_jin.hdf5' ]], columns=['store', 'product', 'promotion', 'sale', 'weight']) model_name = meta_index[(meta_index['store'] == self.store_code) & ( meta_index['product'] == self.product_name)].iloc[0]['weight'] prepareData1 = prepareData(merged_df=self.merged_df, product_name=self.product_name, store_code=self.store_code, train_date=self.train_date, predict_date=self.predict_date, sequence_x=self.sequence_x, sequence_y=self.sequence_y) df, df_train, df_test, sale_qty, x_columns, x_1_columns = prepareData1.sep_data2( ) (x_scaler, x_1_scaler, y_scaler, column_num_x, column_num_x_1, x_columns, x_1_columns, sale_qty) = prepareData1.scaled_origin() print(df_train) x_train_scaled, x_train_1_scaled, y_train_scaled = prepareData1.scaled_data( df_train=df_train) model = create_model(column_num_x, column_num_x_1, self.sequence_x, self.sequence_y) model.compile( optimizer=keras.optimizers.Adam(), loss=[ keras.losses.Huber(), # MeanSquaredError,Huber ], metrics=['mse']) filepath = './weight/' + model_name checkpoint_path = filepath checkpoint = ModelCheckpoint(checkpoint_path, monitor='loss', verbose=1, save_best_only=True, mode='min') earlystop = EarlyStopping( monitor='loss', min_delta=0, patience=30, verbose=0, mode='auto', baseline=None, restore_best_weights=False, ) callbacks_list = [checkpoint, earlystop] history = model.fit({ "long": x_train_scaled, "short": x_train_1_scaled }, {"prediction": y_train_scaled}, epochs=2000, batch_size=32, callbacks=callbacks_list, shuffle=False) scores = model.evaluate(x=(x_train_scaled, x_train_1_scaled), y=y_train_scaled, verbose=0) # return history return scores