Ejemplo n.º 1
0
def generate_data():
    TRAIN_SET_DIR = '/Users/knight/Desktop/GodClassDetection/trainset/train1'  # 直接改成自己的路径
    train_x = preprocess.get_metrics(TRAIN_SET_DIR)
    print('train_x[0]:', train_x[0])
    train_y = preprocess.get_labels(TRAIN_SET_DIR)
    print('train_y[0]:', train_y[0])

    TEST_SET_DIR = '/Users/knight/Desktop/GodClassDetection/trainset/train2'  # 直接改成自己的路径
    test_x = preprocess.get_metrics(TEST_SET_DIR)
    test_y = preprocess.get_labels(TEST_SET_DIR)

    print('-------------train datasize:', len(train_x))
    print('train_x:\n', train_x)
    print('train_y:\n', train_y)
    print()
    print('-------------test datasize:', len(test_x))
    print('test_x:\n', test_x)
    print('test_y:\n', test_y)

    return train_x, train_y, test_x, test_y
Ejemplo n.º 2
0
def plot_data_distribution(fraction=0.1):
    """
	Plotting distribution of sleep stages in data

	:param fraction: fraction of data to sample for the plot
	"""
    # Sampling from full dataset
    assert 0 < fraction <= 1
    all_xml = [
        f for f in os.listdir(C.RAW_XML_DIR)
        if os.path.isfile(C.RAW_XML_DIR + f)
    ]
    all_xml = np.array(all_xml)
    np.random.shuffle(all_xml)
    cutoff = int(fraction * len(all_xml))
    samples = all_xml[:cutoff]

    # Getting labels for all epochs from each patient
    n = C.FINAL_SAMPLING_FREQ * 30  # Rows per epoch
    all_labels = []
    for path in samples:
        labels = get_labels(C.RAW_XML_DIR + path)
        num_rows = len(labels)

        labels = np.reshape(
            labels,
            (num_rows // n, n, 1))  # Into shape (epoch, sequence, features)
        labels = labels[:, 0, 0]
        labels = labels.astype(np.int64)

        all_labels.extend(labels)

    # Computing
    scores, counts = np.unique(np.array(all_labels), return_counts=True)
    percentage = []

    print("Number of patients: {}".format(len(samples)))
    print("Number of epochs: {}:".format(len(all_labels)))
    print("Score, Counts, Percentage")
    for i in range(0, len(scores)):
        print("{}, {}, {}".format(scores[i], counts[i],
                                  counts[i] / len(all_labels) * 100))
        percentage.append(counts[i] / len(all_labels) * 100)

    # Plotting
    x_pos = np.arange(len(scores))
    x_labels = ['Wake', 'N1', 'N2', 'N3', 'REM']

    plt.figure()
    plt.bar(x_pos, percentage, align='center', alpha=0.5)
    plt.xticks(x_pos, x_labels)
    plt.title('Distribution of Sleep Stages')
    plt.ylabel('Percentage')
    plt.savefig(C.GRAPHS_DIR + "stage_distribution.png")
def compress_audio_files(path):
    labels = get_labels(path)
    for label in labels:
        print("Compressing... ", label)
        audiofiles = []
        for audiofile in os.listdir(path + '/' + label):
            #current_path = path + '/' + label + '/' +audiofile
            audiofiles.extend([audiofile])
        i = 1
        for audiofile in audiofiles:
            #            file_size = get_size_in_mega_bytes(path+"/"+label+"/"+audiofile)
            #            if file_size > 0.45:
            convert_to_mp3(path=path,
                           label=label,
                           file_name=audiofile,
                           index=i)
            i += 1
Ejemplo n.º 4
0
def predict(name):

    data = {"path": name}

    params = flask.request.json
    if (params == None):
        params = flask.request.args

    # if parameters are found, return a prediction
    if (params != None):
        with graph.as_default():
            sample = preprocess.wav2mfcc('C://Users//Stage//Downloads//' +
                                         name + '.wav')
            print(name)
            sample_reshaped = sample.reshape(1, 40, 47, 1)
            data["prediction"] = preprocess.get_labels()[0][np.argmax(
                model.predict(sample_reshaped))]
            data["success"] = True

    # return a response in json format
    return flask.jsonify(data)
Ejemplo n.º 5
0
def predictTest(name):

    data = {"path": name}

    params = flask.request.json
    if (params == None):
        params = flask.request.args

    # if parameters are found, return a prediction
    if (params != None):
        with graph.as_default():
            dir = "C://Users//Stage//final project//test//" + name
            filename = random.choice(os.listdir(dir))
            print(filename)
            sample = preprocess.wav2mfcc(dir + "//" + filename)
            print(name)
            sample_reshaped = sample.reshape(1, 40, 47, 1)
            data["prediction"] = preprocess.get_labels()[0][np.argmax(
                model.predict(sample_reshaped))]
            data["success"] = True

    # return a response in json format
    return flask.jsonify(data)
Ejemplo n.º 6
0
def prepare_model():
    print("Preparing model...")
    raw_train, raw_test = get_raw_data()
    tokenized_inputs = get_tokenized_inputs(raw_train, cols=in_features)

    labels = get_labels(raw_train)

    split_index_inputs = int(0.9 * tokenized_inputs.shape[0])
    split_index_labels = int(0.9 * labels.shape[0])
    train_inputs = tokenized_inputs[:split_index_inputs]
    train_labels = labels[:split_index_labels]

    test_inputs = tokenized_inputs[split_index_inputs:]
    test_labels = labels[split_index_labels:]

    train_loader = DataLoader(list(zip(train_inputs, train_labels)),
                              batch_size=hyperparams["batch_size"])
    test_loader = DataLoader(list(zip(test_inputs, test_labels)),
                             batch_size=hyperparams["batch_size"])

    model = AttnModel(hyperparams).to(device)

    return model, train_loader, test_loader
Ejemplo n.º 7
0
import tensorflow as tf
from transformers import BertTokenizer, TFBertModel
import numpy as np
import pandas as pd
import models, preprocess, mybert

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.python.keras.callbacks import EarlyStopping


# 데이터셋 로드 => bert로 전처리 => label 생성
test_data = pd.read_csv("gap-test.tsv", sep = '\t')
test_emb = mybert.run_bert(test_data[:])
test_labels = preprocess.get_labels(test_data[:])

validation_data = pd.read_csv("gap-validation.tsv", sep = '\t')
val_emb = mybert.run_bert(validation_data[:])
val_labels = preprocess.get_labels(validation_data[:])

development_data = pd.read_csv("gap-development.tsv", sep = '\t')
dev_emb = mybert.run_bert(development_data)
dev_labels = preprocess.get_labels(development_data[:])


submission_data = pd.read_csv("test_stage_2.tsv", sep = '\t')
sub_emb = mybert.run_bert(submission_data)


x_test = np.array(test_emb)
y_test = np.array(test_labels)
Ejemplo n.º 8
0
from resnet import build_resnet
from keras.models import Model, load_model
from keras.callbacks import ModelCheckpoint, TensorBoard, LambdaCallback
from keras_contrib.utils.save_load_utils import save_all_weights, load_all_weights
from keras.utils import plot_model
from keras.optimizers import Nadam, Adamax, Adam, SGD, RMSprop
from sklearn import metrics
from sklearn.model_selection import train_test_split
import dpn
import time

os.environ['CUDA_VISIBLE_DEVICES'] = '2,3,4,5'

train_df = pre.load_data('train.json')
images = pre.get_images(train_df)
labels = pre.get_labels(train_df)
del (train_df)

X_train, X_val, y_train, y_val = train_test_split(images,
                                                  labels,
                                                  test_size=0.2,
                                                  random_state=None)

img_gen = pre.images_generator()
print 'Images generator inits completely'

print 'training images:', X_train.shape
print 'validation images:', X_val.shape
print 'training labels:', y_train.shape
print 'validaiton labels:', y_val.shape
Ejemplo n.º 9
0
def predict(filepath, model=None):  # predict english word based CNN
    sample = wav2mfcc(filepath)
    feature_dim_1, feature_dim_2, channel = 20, 11, 1
    sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel)
    return get_labels()[0][np.argmax(model.predict(sample_reshaped))]
Ejemplo n.º 10
0
def predict(filepath, model):
    sample = wav2mfcc(filepath)
    sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel)
    return get_labels()[0][np.argmax(model.predict(sample_reshaped))]
Ejemplo n.º 11
0
    parser.add_argument("-l",
                        "--load",
                        action="store_true",
                        help="load model.pt")

    args = parser.parse_args()

    pickle_in = open(args.data_file[0], "rb")
    mols = pickle.load(pickle_in)
    pickle_in.close()

    #load numpy files
    wiener_idx = np.load(args.data_file[1])
    hyper_wiener_idx = np.load(args.data_file[2])
    zagreb_idx = np.load(args.data_file[3])
    labels = get_labels(wiener_idx, hyper_wiener_idx, zagreb_idx)

    data = RegressionData(mols, labels)
    dataset = get_data(data)
    dgl_graphs = [build_graph(data) for data in dataset]

    dataset = list(zip(dgl_graphs, labels))
    train_dataset, valid_dataset, test_dataset = split_dataset(dataset,
                                                               [0.8, 0.1, 0.1],
                                                               shuffle=True)

    def collate(samples):
        graphs, labels = map(list, zip(*samples))
        batched_graph = dgl.batch(graphs)
        return batched_graph, torch.tensor(labels)
def create_extra_data(path):
    #    compress_audio_files(path)
    labels = get_labels(path)
    for label in labels:
        print("Creating exta data for:", label)
        audiofiles = []
        for audiofile in os.listdir(path + '/' + label):
            #current_path = path + '/' + label + '/' +audiofile
            audiofiles.extend([audiofile])
        i = 1
        for audiofile in audiofiles:
            add_noise(path=path, label=label, file_name=audiofile, index=i)
            skip_n_seconds(path=path,
                           label=label,
                           file_name=audiofile,
                           index=i,
                           n=2)
            speed_up_audio(path=path,
                           label=label,
                           file_name=audiofile,
                           index=i,
                           speed=1.2)
            i += 1


#save_our_data_as_numpy_array(max_len = 80, max_len2 = 40,origin_path=my_data_set_path,destination_path=big_numpy_files_path)
#labels = get_labels(big_numpy_files_path)
#for label in labels:
#    x = np.load(big_numpy_files_path + '/' + label)
#    if x.shape[0]<150:
#        print(label, "Shape:",x.shape[0])

#path = our_data_set_path
#labels = get_labels(path)
#for label in labels:
#    print("Creating exta data for:",label)
#    audiofiles = []
#    for audiofile in os.listdir(path + '/' + label):
#        #current_path = path + '/' + label + '/' +audiofile
#        audiofiles.extend([audiofile])
#    index = np.random.randint(low=2,high=3)
#    audiofiles = audiofiles[::index]
#    i=1
#    for audiofile in audiofiles:
#        f = np.random.randint(low=0,high=2)
#        fade_audio(path=path,label=label,file_name=audiofile,index=i,fade=f)
#        i+=1

#path = our_data_set_path
#labels = get_labels(path)
#for label in labels:
##    print("Creating exta data for:",label)
#    audiofiles = []
#    for audiofile in os.listdir(path + '/' + label):
#        #current_path = path + '/' + label + '/' +audiofile
#        audiofiles.extend([audiofile])
#    i=20
#    for audiofile in audiofiles:
#        if audiofile[-10:-5] == 'noise':
#            skip_n_seconds(path=path,label=label,file_name=audiofile,index=i,n=2)
#        elif audiofile[:6] == 'sliced':
#            speed_up_audio(path=path,label=label,file_name=audiofile,index=i,speed=1.25)
#        elif audiofile[:6] == 'speedx':
#            add_noise(path=path,label=label,file_name=audiofile,index=i)
#        i+=1

#print_user_files(our_data_set_path)