# option 1 for loading other modules
sys.path.append(
    dirname("/Users/dominiquepaul/xCoding/classification_tool/Main/"))

### an alternative menthod for handling file paths ###
# main_path = os.path.dirname(__file__) # this doesnt work when run in a REPL environment
# module_path = os.path.join(main_path,"../" )
# sys.path.append(dirname(module_path))

from preprocessing import join_npy_data
from transfer_learning import Transfer_net

file_paths = [
    "./example_output_folder/apparel_image_package_train_val_split_0.npy"
]
x_train, y_train, x_test, y_test, conversion = join_npy_data(
    file_paths, training_data_only=False)

# instantiate the empty model
transfer_model = Transfer_net()
# we create our own model architecture by passing configuration values
transfer_model.create_network(layers=2,
                              neurons=10,
                              dropout_rate=0.6,
                              num_output_classes=2)
# convert the data into transfer values
x_train = transfer_model.load_transfer_data(x_train)
# alternatively we could also cache the data
# x_train = transfer_model.load_or_cache_transfer_data(x_train, file_path="./example_output_folder/transfernet_cached_files")

# traing the model
transfer_model.train(x_train,
sys.path.append(dirname(module_path))

### an alternative menthod for handling file paths is by using the absolute path ###
# sys.path.append(dirname("/Users/dominiquepaul/xCoding/classification_tool/Main/modules/"))

from regressionclass import Logistic_regression, Lasso_regression

# folder where different label evaluations are saved
FOLDER_PATH_SAVE = "../Data/wnet_hyperopt_datasets"
# path with the industry dict folders
ind_labels = load_industry_labels(
    file_path="./industry_dicts/selection_AutomobileManufacturers.csv")

OBJECT = "car"
automotive_pckgs = ["../Data/np_files/car_image_package_train_test_split0.npy"]
x_train, y_train, x_test, y_test, conversion = join_npy_data(automotive_pckgs)

n_label_list = [3, 5, 8, 10, 15, 20, 25, 50]

# transform or load the data if necessary
for label_amount in tqdm(n_label_list):
    x_train_df = create_feature_df(imgs=x_train,
                                   object_name=OBJECT,
                                   ind_labels=ind_labels,
                                   k_labels=label_amount)
    x_test_df = create_feature_df(imgs=x_test,
                                  object_name=OBJECT,
                                  ind_labels=ind_labels,
                                  k_labels=label_amount)
    x_train_df.to_csv(FOLDER_PATH_SAVE + "/train_{}".format(label_amount))
    x_test_df.to_csv(FOLDER_PATH_SAVE + "/test_{}".format(label_amount))
from cnn import cnn_model
from preprocessing import join_npy_data

MAX_EVALS = 20

### loading the data. Two possible methods:

# For running the script on a local machine
# data =["../Data/np_files/car_image_package_train_test_split0.npy"]
# x_train, y_train, x_test, y_test , conversion = join_npy_data(data)

# For running the script on a cloud server machine
data_url = [
    'gs://data-imr-unisg/np_array_files/car_image_package_train_val_split_0.npy'
]
x_train, y_train, x_test, y_test, conversion = join_npy_data(
    data_url, training_data_only=False)

# File to save first results
out_file = 'out_files/hyperparameter_opt/custom_nn_hyperopt.csv'
with open(out_file, 'w') as csv_file:
    writer = csv.writer(csv_file)
    # Write the headers to the file
    writer.writerow([
        'conv_layers', 'conv_filters', 'dense_layers', 'dense_neurons',
        'dropout_rate_dense', 'learning_rate', 'run_time', 'val_loss',
        'val_accuracy', 'val_f1', 'train_loss', 'train_accuracy', 'train_f1'
    ])


# the function to be optimised: it takes the parameters and returns the loss (metric to be minimised)
def objective(params):
                                 k_labels=20)
x_test_df_50 = create_feature_df(imgs=x_test,
                                 object_name=OBJECT_NAME,
                                 ind_labels=ind_labels,
                                 k_labels=50)

ALL_PREDICTIONS_DF = pd.DataFrame({"names": names})
# only method that doesnt require a training set
run_wordnet_direct("car", "custom", "Unaugmented")

# run 1/4: own images not augmented
automotive_pckgs = [
    os.path.join(DATA_FOLDER_PATH,
                 "np_files/car_image_package_train_val_split_0.npy")
]
x_train, y_train, _, _, conversion = join_npy_data(automotive_pckgs,
                                                   training_data_only=False)

run_custom_network(OBJECT_NAME, "custom", "Unaugmented")
run_transfer_network(OBJECT_NAME, "custom", "Unaugmented")
run_wordnet_indirect_v3(OBJECT_NAME, "custom", "Unaugmented")
run_wordnet_indirect_v4(OBJECT_NAME, "custom", "Unaugmented")

# run 2/4: own images augmented
automotive_pckgs_augmented = [
    os.path.join(DATA_FOLDER_PATH,
                 "np_files/car_image_package_train_val_split_augmented_0.npy"),
    os.path.join(DATA_FOLDER_PATH,
                 "np_files/car_image_package_train_val_split_augmented_1.npy"),
    os.path.join(DATA_FOLDER_PATH,
                 "np_files/car_image_package_train_val_split_augmented_2.npy"),
    os.path.join(DATA_FOLDER_PATH,
Esempio n. 5
0
    OBJECT = "car"
    ind_labels = load_industry_labels(file_path="./industry_dicts/selection_AutomobileManufacturers.csv")

    # non_augmented


    #basic_feats = ["max_score", "product_ref_count", "product_ref_sum", "product_reference"]
    #wordnet_feats = ["product_count_wordnet", "maxscorevalue_wordnet", "product_sum_wordnet"]

    #####################
    #### approach 1 #####
    #####################
    # direct wordnet
    automotive_pckgs = ["../Data/np_files4/car_image_package_train_val_split_0.npy"]
    x_train, y_train, x_test, y_test, conversion = join_npy_data(automotive_pckgs, training_data_only=False)

    words_sought = ["cars","truck"]
    predictions = identify_items(x_test[:2], words_sought, k_labels=5, use_synonyms=True)


    #####################
    #### approach 2 #####
    #####################
    # regression with basic features
    ind_labels = load_industry_labels(file_path="./industry_dicts/selection_AutomobileManufacturers.csv")
    automotive_pckgs = ["../Data/np_files/car_image_package_train_test_split0.npy"]
    x_train, y_train, x_test, y_test, conversion = join_npy_data(automotive_pckgs)
    x_train_df = create_feature_df(imgs=x_test, object_name=OBJECT, ind_labels=ind_labels, k_labels=10, basic_feats=True, wordnet_feats=False)

    # train regression
Esempio n. 6
0
from transfer_learning import Transfer_net
from preprocessing import join_npy_data

EPOCHS = 10000
MAX_EVALS = 20

# File to save first results
out_file = 'out_files/hyperparameter_opt/transfer_learning_hyperopt_out.csv'
with open(out_file, 'w') as csv_file:
    writer = csv.writer(csv_file)
    # Write the headers to the file
    writer.writerow(['neurons','layers','dropout_rate','learning_rate','run_time', 'val_loss', 'val_accuracy', 'train_loss', 'train_accuracy'])

automotive_pckgs = ["../Data/np_files4/car_image_package_train_val_split_0.npy"]
x_data, y_train, _, _, conversion = join_npy_data(automotive_pckgs, training_data_only=False)

# hyperparameter optimization with hyperopt
def objective(params):
    t_net = Transfer_net()
    t_net.create_network(layers=params["layers"], neurons=params["neurons"], dropout_rate=params["dropout_rate"], num_output_classes=2)
    x_train = t_net.load_or_cache_transfer_data(x_data, file_path="../Data/transfernet_files/x_train_T7")
    start = timer()
    t_net.train(x_train, y_train, learning_rate=params["learning_rate"], epochs=EPOCHS, batch_size=256, verbose=True, tb_logs_dir="./out_files/log_files/transfer_net/")
    run_time = timer() - start

    val_loss = t_net.hist.history["val_loss"][-1]
    val_accuracy = t_net.hist.history["val_acc"][-1]
    train_loss = t_net.hist.history["loss"][-1]
    train_accuracy = t_net.hist.history["acc"][-1]