예제 #1
0
def train():
	params={
	'GLOVE_DIR': '/data/'+str(sys.argv[1])+'/keras-captioning/files/GLOVE',
	'EMBEDDING_DIM': 128,
	'MAX_SEQUENCE_LENGTH': 20,
	'VOCAB_SIZE': 7706,
	'RECUR_OUTPUT_DIM': 512,
	'IMAGE_ENCODING_SIZE': 4096,
	'PATH_TRAIN': '/data/'+str(sys.argv[1])+'/keras-captioning/files/Flickr8k_text/flickr_8k_train_dataset.txt',
	'PICKLE_FILE': '/data/'+str(sys.argv[1])+'/keras-captioning/files/encoded_images.p',
	'SPE': 128,
	'EPOCHS': 100,
	'BATCH_SIZE': 128,
	'SAVE_PATH': '/data/'+str(sys.argv[1])+'/keras-captioning/files/models/'
	}
	

	# Get the model from main_model
	main = main_model(params)
	model = main.model

	# Get the generator from dF
	generator = main.gen
	model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])

	newpath = params['SAVE_PATH'] + sys.argv[2]
	if not os.path.exists(newpath):
		os.makedirs(newpath)
	filepath = params['SAVE_PATH'] + sys.argv[2] + "/weights-improvement-{epoch:02d}.hdf5"
	checkpoint = ModelCheckpoint(filepath, verbose=0, save_best_only=False, mode='max')
	callbacks_list = [checkpoint]

	model.fit_generator(generator,verbose=1,epochs=params['EPOCHS'],steps_per_epoch=params['SPE'],callbacks=callbacks_list)
	name = 'abc.hd5'
	model.save(params['SAVE_PATH'] + name)
예제 #2
0
def main():
    # 3.model init
    model = main_model(len(value_vocab), EMBEDDING_value, len(type_vocab), EMBEDDING_type, HIDDEN_SIZE, BATCH_SIZE,
                       CONTEXT_WINDOW).to(device)

    loss_function = nn.NLLLoss()
    learning_rate = 0.001
    #decay = 0.6
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    clip = 5
    nn.utils.clip_grad_norm_(model.parameters(), clip)
    losses_train = []
    losses_eval = []
    M_A_P = []
    M_A_P_test = []

    staring_training = time.time()
    print("starting training ", staring_training - time_start)

    ##  training
    num_epochs= 100
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        train_loss = train(model,optimizer,data_loader_train,loss_function)
        val_loss ,m_A_p = eval(model,data_loader_eval,loss_function)
        _, m_A_p_test = eval(model, data_loader_test, loss_function)
        now = time.time()

        losses_train.append(train_loss / len_train)

        print("[Epoch:%d] train_loss:%f val_loss:%f test_MAP:%f | time spend: %f"
              % (epoch + 1, train_loss / len_train,val_loss/len_eval,m_A_p_test,(now - time_start)/60))
        losses_eval.append(val_loss / len_eval)
        M_A_P.append(m_A_p)
        M_A_P_test.append(m_A_p_test)
        if (epoch+1) % 5 == 0 :
            print("train loss",losses_train)
            print("eval loss",losses_eval)
            print("eval MAP", M_A_P)
            print("test MAP", M_A_P_test)
            #torch.save(model.state_dict(),r'params_lstm_attn_50k.pkl')
    #model.load_state_dict(torch.load(r"./para/"+str(epoch+1)+'params_lstm_attn_50k.pkl'))

    import pandas as pd

    dataframe = pd.DataFrame({'train_loss': losses_train, 'eval_loss': losses_eval,"eval MAP": M_A_P})
    dataframe.to_csv("test.csv", index=False, sep=',')
def estimate_lift(model_data, item_id, talk=True):
    """ Estimates the lift of the given item. """

    y_normalization = 10000  # Used to fix units of the y variables
    pd.options.mode.chained_assignment = None  # Stops printing a warning that is not relevant

    # Get the category
    category = 0
    for i in range(1, 7):
        if model_data.loc[((model_data['item_id'] == item_id) &
                           (model_data['is_cat_' + str(i)] == 1))].empty:
            continue
        else:
            category = i
            break

    # Get the rows for this category (all items)
    X, y = feature_select.feature_select(
        prep_data.get_category(model_data, category), category)

    # Get the the promo period range
    start_week = X.loc[((X['on_promo'] == 1) & (X['item_id'] == item_id)),
                       'week'].min()
    end_week = X.loc[((X['on_promo'] == 1) & (X['item_id'] == item_id)),
                     'week'].max()

    # Get the total normalized sales during the promotion
    promotion_sales = model_data.loc[((model_data['item_id'] == item_id) &
                                      (model_data['week'] >= start_week) &
                                      (model_data['week'] <= end_week)),
                                     'normalized_sales'].sum()

    # Estimate the sales during the same period if there was no promotion
    X_item = X.loc[((X['item_id'] == item_id) & (X['week'] >= start_week) &
                    (X['week'] <= end_week))]
    X_item['on_promo'] = 0
    y_no_promo = model.main_model(X, y, X_item) / y_normalization

    if talk:
        print("Item", item_id)
        print("Promo period:", end_week - start_week, "weeks")
        print("Available data points were:", X_item.shape[0])
        print("Estimated lift per week: ",
              round(
                  100 * (promotion_sales - y_no_promo.sum()) /
                  (end_week - start_week), 2),
              "%\n",
              sep='')
예제 #4
0
    def get_result(self):
        self.check()
        data = ''
        home_address = Address(self.address)
        if home_address.is_zip_good():
            self.home_zip = home_address.zip_code
            if home_address.is_king_county():
#                self.val = model_random_forest(self.home_zip, int(self.living), int(self.beds), int(self.baths), int(self.lot),
#                                     int(self.year))
                self.val = main_model(self.home_zip, int(self.living), int(self.beds), int(self.baths), int(self.lot),
                                      int(self.year))
                self.has_model_data = True
            if home_address.is_address_good():
                data = zillow_api(self.address)
            if not isinstance(data, str):
                self.zillow = data['principal'].get_dict()
                self.val = self.zillow['zestimate']['amount']
                self.has_zillow = True
                if not isinstance(data['comps'], str):
                    for el in data['comps']:
                        self.comps.append(el.get_dict())
예제 #5
0
    X_train_eeg.values[:, :, None],  # original
    X_train.iloc[:, 0:10],  # auxiliary
]

model_val = [
    X_val_eeg.iloc[:, np.arange(0, 1250, 5)].values[:, :, None],  # small
    X_val_eeg.iloc[:, np.arange(0, 1250, 2)].values[:, :, None],  # medium
    X_val_eeg.values[:, :, None],  # original
    X_val.iloc[:, 0:10],  # auxiliary
]

# Run model
batch_size = 256
epochs = 15

model = main_model()

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.fit(model_train, [y_train, y_train],
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(model_val, [y_val, y_val]))

# model.fit(model_train, y_train,
#           batch_size=batch_size,
#           epochs=epochs,
#           validation_data=(model_val, y_val))
예제 #6
0
def plot_predicted(X_observed, y_observed, item_id, category):
    """ Plots the predicted values vs observed values.

    Inputs:
    X_observed: pandas dataframe, all rows in a given category
    y_observed: pandas dataframe, the observations of X_observed
    item_id:    int,              the item ID
    category:   int,              the category of the item

    -> Create an overlay plot with the predicted and observed values for the model for each of the promotion products,
    be sure to highlight the promo periods with a different color.
    """

    y_normalization = 10000  # Used to fix units of the y variables

    # Create the values to predict: X_predict
    X_predict = X_observed.loc[X_observed['item_id'] == item_id]

    reference_row = X_predict.loc[X_predict['week'] == min(X_predict['week'])]

    for week in range(0, 206):
        week_row = X_predict.loc[X_predict['week'] == week]
        if week_row.empty:
            reference_row['week'] = week
            X_predict = X_predict.append(reference_row)
        else:
            reference_row = week_row

    plt.figure(item_id)

    X_observed_regular = X_observed.loc[((X_observed['on_promo'] == 0) &
                                         (X_observed['item_id'] == item_id))]
    y_observed_regular = y_observed[((X_observed['on_promo'] == 0) &
                                     (X_observed['item_id'] == item_id))]
    plt.scatter(X_observed_regular['week'],
                y_observed_regular / y_normalization,
                c='royalblue',
                zorder=8,
                label='Regular Sales')

    X_observed_promo = X_observed.loc[((X_observed['on_promo'] == 1) &
                                       (X_observed['item_id'] == item_id))]
    y_observed_promo = y_observed[((X_observed['on_promo'] == 1) &
                                   (X_observed['item_id'] == item_id))]
    plt.scatter(X_observed_promo['week'],
                y_observed_promo / y_normalization,
                c='firebrick',
                zorder=9,
                label='Promotional Sales',
                s=100)

    # The predicted data maintain the promo dates
    plt.scatter(X_predict['week'],
                model.main_model(X_observed, y_observed, X_predict) /
                y_normalization,
                c='g',
                zorder=10,
                label='Predicted Sales',
                marker='d')

    plt.xlabel('Week Number')
    plt.ylabel('Normalized Sales')
    plt.title('Sales of Item ' + str(item_id) + ' in Category ' +
              str(category))

    plt.legend()
    plt.show()