def train(): params={ 'GLOVE_DIR': '/data/'+str(sys.argv[1])+'/keras-captioning/files/GLOVE', 'EMBEDDING_DIM': 128, 'MAX_SEQUENCE_LENGTH': 20, 'VOCAB_SIZE': 7706, 'RECUR_OUTPUT_DIM': 512, 'IMAGE_ENCODING_SIZE': 4096, 'PATH_TRAIN': '/data/'+str(sys.argv[1])+'/keras-captioning/files/Flickr8k_text/flickr_8k_train_dataset.txt', 'PICKLE_FILE': '/data/'+str(sys.argv[1])+'/keras-captioning/files/encoded_images.p', 'SPE': 128, 'EPOCHS': 100, 'BATCH_SIZE': 128, 'SAVE_PATH': '/data/'+str(sys.argv[1])+'/keras-captioning/files/models/' } # Get the model from main_model main = main_model(params) model = main.model # Get the generator from dF generator = main.gen model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy']) newpath = params['SAVE_PATH'] + sys.argv[2] if not os.path.exists(newpath): os.makedirs(newpath) filepath = params['SAVE_PATH'] + sys.argv[2] + "/weights-improvement-{epoch:02d}.hdf5" checkpoint = ModelCheckpoint(filepath, verbose=0, save_best_only=False, mode='max') callbacks_list = [checkpoint] model.fit_generator(generator,verbose=1,epochs=params['EPOCHS'],steps_per_epoch=params['SPE'],callbacks=callbacks_list) name = 'abc.hd5' model.save(params['SAVE_PATH'] + name)
def main(): # 3.model init model = main_model(len(value_vocab), EMBEDDING_value, len(type_vocab), EMBEDDING_type, HIDDEN_SIZE, BATCH_SIZE, CONTEXT_WINDOW).to(device) loss_function = nn.NLLLoss() learning_rate = 0.001 #decay = 0.6 optimizer = optim.Adam(model.parameters(), lr=learning_rate) clip = 5 nn.utils.clip_grad_norm_(model.parameters(), clip) losses_train = [] losses_eval = [] M_A_P = [] M_A_P_test = [] staring_training = time.time() print("starting training ", staring_training - time_start) ## training num_epochs= 100 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch+1, num_epochs)) print('-' * 10) train_loss = train(model,optimizer,data_loader_train,loss_function) val_loss ,m_A_p = eval(model,data_loader_eval,loss_function) _, m_A_p_test = eval(model, data_loader_test, loss_function) now = time.time() losses_train.append(train_loss / len_train) print("[Epoch:%d] train_loss:%f val_loss:%f test_MAP:%f | time spend: %f" % (epoch + 1, train_loss / len_train,val_loss/len_eval,m_A_p_test,(now - time_start)/60)) losses_eval.append(val_loss / len_eval) M_A_P.append(m_A_p) M_A_P_test.append(m_A_p_test) if (epoch+1) % 5 == 0 : print("train loss",losses_train) print("eval loss",losses_eval) print("eval MAP", M_A_P) print("test MAP", M_A_P_test) #torch.save(model.state_dict(),r'params_lstm_attn_50k.pkl') #model.load_state_dict(torch.load(r"./para/"+str(epoch+1)+'params_lstm_attn_50k.pkl')) import pandas as pd dataframe = pd.DataFrame({'train_loss': losses_train, 'eval_loss': losses_eval,"eval MAP": M_A_P}) dataframe.to_csv("test.csv", index=False, sep=',')
def estimate_lift(model_data, item_id, talk=True): """ Estimates the lift of the given item. """ y_normalization = 10000 # Used to fix units of the y variables pd.options.mode.chained_assignment = None # Stops printing a warning that is not relevant # Get the category category = 0 for i in range(1, 7): if model_data.loc[((model_data['item_id'] == item_id) & (model_data['is_cat_' + str(i)] == 1))].empty: continue else: category = i break # Get the rows for this category (all items) X, y = feature_select.feature_select( prep_data.get_category(model_data, category), category) # Get the the promo period range start_week = X.loc[((X['on_promo'] == 1) & (X['item_id'] == item_id)), 'week'].min() end_week = X.loc[((X['on_promo'] == 1) & (X['item_id'] == item_id)), 'week'].max() # Get the total normalized sales during the promotion promotion_sales = model_data.loc[((model_data['item_id'] == item_id) & (model_data['week'] >= start_week) & (model_data['week'] <= end_week)), 'normalized_sales'].sum() # Estimate the sales during the same period if there was no promotion X_item = X.loc[((X['item_id'] == item_id) & (X['week'] >= start_week) & (X['week'] <= end_week))] X_item['on_promo'] = 0 y_no_promo = model.main_model(X, y, X_item) / y_normalization if talk: print("Item", item_id) print("Promo period:", end_week - start_week, "weeks") print("Available data points were:", X_item.shape[0]) print("Estimated lift per week: ", round( 100 * (promotion_sales - y_no_promo.sum()) / (end_week - start_week), 2), "%\n", sep='')
def get_result(self): self.check() data = '' home_address = Address(self.address) if home_address.is_zip_good(): self.home_zip = home_address.zip_code if home_address.is_king_county(): # self.val = model_random_forest(self.home_zip, int(self.living), int(self.beds), int(self.baths), int(self.lot), # int(self.year)) self.val = main_model(self.home_zip, int(self.living), int(self.beds), int(self.baths), int(self.lot), int(self.year)) self.has_model_data = True if home_address.is_address_good(): data = zillow_api(self.address) if not isinstance(data, str): self.zillow = data['principal'].get_dict() self.val = self.zillow['zestimate']['amount'] self.has_zillow = True if not isinstance(data['comps'], str): for el in data['comps']: self.comps.append(el.get_dict())
X_train_eeg.values[:, :, None], # original X_train.iloc[:, 0:10], # auxiliary ] model_val = [ X_val_eeg.iloc[:, np.arange(0, 1250, 5)].values[:, :, None], # small X_val_eeg.iloc[:, np.arange(0, 1250, 2)].values[:, :, None], # medium X_val_eeg.values[:, :, None], # original X_val.iloc[:, 0:10], # auxiliary ] # Run model batch_size = 256 epochs = 15 model = main_model() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(model_train, [y_train, y_train], batch_size=batch_size, epochs=epochs, validation_data=(model_val, [y_val, y_val])) # model.fit(model_train, y_train, # batch_size=batch_size, # epochs=epochs, # validation_data=(model_val, y_val))
def plot_predicted(X_observed, y_observed, item_id, category): """ Plots the predicted values vs observed values. Inputs: X_observed: pandas dataframe, all rows in a given category y_observed: pandas dataframe, the observations of X_observed item_id: int, the item ID category: int, the category of the item -> Create an overlay plot with the predicted and observed values for the model for each of the promotion products, be sure to highlight the promo periods with a different color. """ y_normalization = 10000 # Used to fix units of the y variables # Create the values to predict: X_predict X_predict = X_observed.loc[X_observed['item_id'] == item_id] reference_row = X_predict.loc[X_predict['week'] == min(X_predict['week'])] for week in range(0, 206): week_row = X_predict.loc[X_predict['week'] == week] if week_row.empty: reference_row['week'] = week X_predict = X_predict.append(reference_row) else: reference_row = week_row plt.figure(item_id) X_observed_regular = X_observed.loc[((X_observed['on_promo'] == 0) & (X_observed['item_id'] == item_id))] y_observed_regular = y_observed[((X_observed['on_promo'] == 0) & (X_observed['item_id'] == item_id))] plt.scatter(X_observed_regular['week'], y_observed_regular / y_normalization, c='royalblue', zorder=8, label='Regular Sales') X_observed_promo = X_observed.loc[((X_observed['on_promo'] == 1) & (X_observed['item_id'] == item_id))] y_observed_promo = y_observed[((X_observed['on_promo'] == 1) & (X_observed['item_id'] == item_id))] plt.scatter(X_observed_promo['week'], y_observed_promo / y_normalization, c='firebrick', zorder=9, label='Promotional Sales', s=100) # The predicted data maintain the promo dates plt.scatter(X_predict['week'], model.main_model(X_observed, y_observed, X_predict) / y_normalization, c='g', zorder=10, label='Predicted Sales', marker='d') plt.xlabel('Week Number') plt.ylabel('Normalized Sales') plt.title('Sales of Item ' + str(item_id) + ' in Category ' + str(category)) plt.legend() plt.show()