Beispiel #1
0
def split_and_build_class(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    print X_train.shape
    print X_test.shape

    # Normalize the input data.
    imp = preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0)
    fixed_X_train = X_train[:, 1:]
    imp.fit(fixed_X_train)
    fixed_X_train = imp.transform(fixed_X_train)
    preprocessing.normalize(fixed_X_train, copy=False)
    X_train[:, 1:] = fixed_X_train

    fixed_X_test = X_test[:, 1:]
    imp.fit(fixed_X_test)
    fixed_X_test = imp.transform(fixed_X_test)
    preprocessing.normalize(fixed_X_test, copy=False)
    X_test[:, 1:] = fixed_X_test

    train_data = read_dataset.microData()
    train_data.get_data(X_train)
    y_train = train_data.set_output(y_train)
    test_data = read_dataset.microData()
    test_data.get_data(X_test)
    y_test = test_data.set_output(y_test)

    return [X_train, X_test, y_train, y_test, train_data, test_data]
def plot_one_feature(plot_files, all_file_param, dataset_path, feature):
    file_amount = len(all_file_param)

    # Initiate the plot.
    cmap = plt.get_cmap('jet_r')
    plt.figure(figsize=(25, 10))
    # plot_for_legend = plt.subplot()

    # Color setup for single file.
    color = cmap(float(9) / file_amount)

    # Traverse all the dataset.
    print '==========' + feature + '=========='
    for k in xrange(file_amount):
        file = all_file_param[k]

        # Color setup for multiple files.
        # color = cmap(float(k) / file_amount)

        # Use this block to set which dataset you want to find missing intervals.
        if file.data_name not in plot_files:
            continue

        print 'Ploting ' + file.data_name + '...'
        path = dataset_path + file.file_path
        df = pd.read_csv(path)
        all_data = read_dataset.microData()
        all_data.get_data(df.values)

        interval = file.interval
        intervel_minute = read_dataset.get_interval_minute(interval)
        prev_time = all_data.data_time[0]
        for i in xrange(1, len(all_data.data_time)):
            current_time = all_data.data_time[i]
            diff = current_time - prev_time
            if not diff.days and diff.seconds / 60 <= intervel_minute:
                plt.plot(
                    [prev_time, current_time],
                    all_data.read_feature(feature)[i - 1: i + 1],
                    c=color
                )
            prev_time = current_time

            if i % 1000 == 0:
                print 'Plotted ', i, ' lines...'

        print 'Plotted done!'

    plt.legend(plot_files)
    plt.suptitle(feature + ' in ' + str(plot_files))
    plt.savefig('./micro_features_plot/' + feature + '.png', bbox_inches='tight')
def plot_one_feature(plot_files, all_file_param, dataset_path, feature):
    file_amount = len(all_file_param)

    # Initiate the plot.
    cmap = plt.get_cmap('jet_r')
    plt.figure(figsize=(25, 10))
    # plot_for_legend = plt.subplot()

    # Color setup for single file.
    color = cmap(float(9) / file_amount)

    # Traverse all the dataset.
    print '==========' + feature + '=========='
    for k in xrange(file_amount):
        file = all_file_param[k]

        # Color setup for multiple files.
        # color = cmap(float(k) / file_amount)

        # Use this block to set which dataset you want to find missing intervals.
        if file.data_name not in plot_files:
            continue

        print 'Ploting ' + file.data_name + '...'
        path = dataset_path + file.file_path
        df = pd.read_csv(path)
        all_data = read_dataset.microData()
        all_data.get_data(df.values)

        interval = file.interval
        intervel_minute = read_dataset.get_interval_minute(interval)
        prev_time = all_data.data_time[0]
        for i in xrange(1, len(all_data.data_time)):
            current_time = all_data.data_time[i]
            diff = current_time - prev_time
            if not diff.days and diff.seconds / 60 <= intervel_minute:
                plt.plot([prev_time, current_time],
                         all_data.read_feature(feature)[i - 1:i + 1],
                         c=color)
            prev_time = current_time

            if i % 1000 == 0:
                print 'Plotted ', i, ' lines...'

        print 'Plotted done!'

    plt.legend(plot_files)
    plt.suptitle(feature + ' in ' + str(plot_files))
    plt.savefig('./micro_features_plot/' + feature + '.png',
                bbox_inches='tight')