예제 #1
0
def preprocess():
    data_utils.preprocess_data(
        data_paths=[FLAGS.train_data_file, FLAGS.valid_data_file],
        vocab_path=FLAGS.vocabulary_file,
        embedding_path=FLAGS.save_embedding_file,
        train_data_path=FLAGS.train_data_file,
        valid_data_path=FLAGS.valid_data_file
    )
예제 #2
0
from collections import defaultdict
import math
import numpy as np

from data_utils import read_data, preprocess_data, clean_text, read_weights

if __name__ == '__main__':
    data, _ = read_data(use_loaded=True)
    X, y, emb, tokenizer, label_encoder = preprocess_data(data=data,
                                                          use_loaded=True)
    with open("data/adjectives_people.txt", "r", encoding="utf-8") as fin:
        identity_columns = [line.strip() for line in fin.readlines()]

    cleaned_text = data["text"].apply(clean_text).values

    debias_weights = np.ones(len(y))
    num_pos, num_all = defaultdict(int), defaultdict(int)
    sum_pos = sum([debias_weights[i] for i in range(len(y)) if y[i] == 0])
    sum_all = sum(debias_weights)
    for idty in identity_columns:
        for i in range(len(cleaned_text)):
            ok = False
            sen = cleaned_text[i]
            if idty in ["american", "african"]:
                sen_split = sen.split()
                for j in range(len(sen_split)):
                    if sen_split[j] == idty:
                        if j == 0 or " ".join([sen_split[j - 1], sen_split[j]
                                               ]) != "american african":
                            if j == len(sen_split) - 1 or " ".join([
                                    sen_split[j], sen_split[j + 1]
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
model_name = "c3d"  # alexnet, scattering, c3d
layer = "conv2"  # chooses layer
num_frames_per_clip = 3  # for c3d only, must generate more than 1 clip!
device = args.device  # '/cpu:0', '/gpu:0'
data_dir = "/scratch/users/vision/reza/v4"
name = layer  # this can be anything

# indirect params
out_dir = oj("/scratch/users/vision/chandan/out", model_name + \
             "_" + name + "_" + time.strftime("%b%d_%H:%M:%S"))
np.random.seed(13)

# choose model
ims, _ = data_utils.load_data(data_dir, im_range=im_ranges_list[0])
ims = data_utils.preprocess_data(ims=ims)
if model_name == "alexnet":  # alexnet alone
    from models.alexnet.alexnet_model import build_model

    placeholder, model = build_model(ims.shape[1:])
    model = model[layer]
elif model_name == "scattering":  # scattering alone
    from models.scattering.scattering_model import build_model

    ims = np.transpose(ims, (0, 3, 1, 2))  # convert NHWC -> NCHW
    placeholder, model = build_model(ims.shape[1:])

# extract features
for i in range(len(im_ranges_list)):
    im_range = im_ranges_list[i]
    ims, _ = data_utils.load_data(data_dir, im_range=im_range)
        '/Users/ScottEnsel/Desktop/Deep Learning/Project/NEW files/Z_run-010_thumb_index_middle.mat',
        struct_as_record=False,
        squeeze_me=True)
    EMG_data = all_data['z']
    #
    # all_data = sio.loadmat(os.path.join(data_utils.DATA_DIR,data_utils.DATA_SET1), struct_as_record=False, squeeze_me=True)
    # EMG_data = all_data['z']

    THUMB_INDEX = 0
    INDEX_INDEX = 1
    MIDDLE_INDEX = 2
    RING_INDEX = 3
    PINKY_INDEX = 4

    # new_z = data_utils.preprocess_data(EMG_data, THUMB_INDEX)
    new_z = data_utils.preprocess_data(EMG_data, INDEX_INDEX)
    # new_z = data_utils.preprocess_data(EMG_data, MIDDLE_INDEX)

    y = new_z[:, 0]  #seperate labels
    x = new_z[:, 1:]  #seperate features
    #1:34

    #split must be less than 0.5
    x_train, x_test, y_train, y_test, y_kf_train_mean = data_split(x,
                                                                   y,
                                                                   split=0.04)

    training_mean = np.sum(y_train)

    x_hat = Kalman_filter(x_train, x_test, y_train, y_test)
예제 #5
0
logger = get_basic_logger()


def round_pred(pred):
    if pred >= 0.5:
        return 1
    else:
        return 0


if __name__ == '__main__':
    config_path = "config/main_config.json"
    config = load_conf(config_path)
    train_df, test_df = load_data(config.data)
    train_df = preprocess_data(train_df, train=True)
    test_df = preprocess_data(test_df, train=False)
    accuracy = eval_booster(train_df)
    logger.info(f"Mean Accuracy over 5 folds: {accuracy}")
    booster = train_booster(train_df.drop(columns=["PassengerId"]))
    predictions = booster.predict(test_df.drop(columns=["PassengerId"]).values)
    predictions = [round_pred(pred) for pred in predictions]

    submission = pd.DataFrame({
        "PassengerId": test_df.PassengerId,
        "Survived": predictions
    })

    submission = submission.astype(int)
    submission.to_csv("submission.csv", index=False)