Beispiel #1
0
def do_predict(label, category, csvs_folder_path):

    model_path = Utilities.construct_filepath(in_dir, [category, label],
                                              ".model")
    text_clf_svm = pickle.load(open(model_path, "rb"))

    # read in test csv data
    test_csv_path = csvs_folder_path + category + const.TEST
    csv_helper = CsvHelper()
    csv_helper.set_csv(test_csv_path)
    csv_helper.add_column(
        label
    )  # create new column with title label (set on top) so that we can fil lin later
    test_df = csv_helper.get_csv()

    # Do prediction
    data_df = test_df["title"]
    id_df = test_df["itemid"]
    predicted_multiclass = text_clf_svm.predict_proba(data_df)
    pred_top2 = get_top_2_dict(text_clf_svm.classes_, predicted_multiclass,
                               id_df)

    # write back to csv
    write_to_csv(pred_top2, test_df, test_csv_path
                 )  # top 2 is the top 2 predicted test_df is the original csv
Beispiel #2
0
def split(in_dir, out_dir, csv):
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
        print "created folder :", out_dir
    csv_helper = CsvHelper()
    csv_helper.set_csv(csv)
    print "[INFO] using csv file : ", csv
    image_paths_df = csv_helper.get_single_column(IMAGE_PATH)
    copy_images(image_paths_df, in_dir, out_dir)
Beispiel #3
0
"""

if __name__ == "__main__":

    save_model = False
    args = Utilities.process_arg("-save")
    out_dir = "/content/drive/My Drive/models"
    csvs_folder_path = "/content/drive/My Drive/yy_fashion.csv"
    category = "fashion"
    save = "true"

    if save == "t" or save == "T" or save == "true":
        save_model = True

    train_csv_path = "/content/drive/My Drive/yy_fashion.csv"

    csv_helper = CsvHelper()
    csv_helper.set_csv(train_csv_path)  # set csv file as train_csv_path
    print("Set csv done")
    '''
    This part is for mass training like for each attribute/ label just train a model 
    for individual model tuning, set labels with a single label , like like 123
    '''
    csv_helper.set_all_headers_as_label(
    )  # set all the labels as label headers
    labels = csv_helper.get_label_headers(
    )  # get lables i.e all column name besides image name, itemid and title
    #labels = ["Colour_group"] # just doing
    print("Predicting These classes : ", labels)
    for label in labels:
        do_training(label, csv_helper, save_model)