コード例 #1
0
# X, Y, err, names = datasets.read_dan_data()
# n_bins = 64
# if args.use_restricted_bins:
#     X = np.take(X, range(17, 34)+range(n_bins+12, n_bins+17), axis=1)

if args.dataset == 'equatorial':
    X, Y = datasets.read_acs_grid_data(shuffle=False)
    if args.thermal_only:
        X = np.take(X, range(64), axis=1)
elif args.dataset == 'polar':
    X, Y = datasets.read_polar_data(shuffle=False)
    if args.thermal_only:
        X = np.take(X, range(161), axis=1)

if args.normalize:
    X = datasets.normalize_counts(X)

# Fit PCA model and project data into PC space
pca = PCA(n_components=args.n_components)
pca.fit(X)
transformed = pca.transform(X)

# Plot clusters in PC space
fig = plt.figure()
ax1 = fig.add_subplot(111, projection='3d')
ax1.set_xlabel('1')
ax1.set_ylabel('2')
ax1.set_zlabel('3')

fig2 = plt.figure()
ax2 = fig2.add_subplot(111, projection='3d')
コード例 #2
0
ファイル: knn.py プロジェクト: hannah-rae/dan_analysis
parser.add_argument('--n_components',
                    type=int,
                    default=3,
                    help='number of principal components to use for PCA')
parser.add_argument(
    '--use_restricted_bins',
    action='store_true',
    help='only use bins 18-34 and 13-17 for thermal and epithermal')
args = parser.parse_args()

# Load the data sets
X, y = datasets.read_acs_grid_data()
dan_X, dan_y, _, names = datasets.read_dan_data()

# Normalize counts to approximately same range
X = datasets.normalize_counts(X)
dan_X = datasets.normalize_counts(dan_X)

if args.use_restricted_bins:
    n_bins = 64
    X = np.take(X, range(17, 34) + range(n_bins + 12, n_bins + 17), axis=1)
    dan_X = np.take(dan_X,
                    range(17, 34) + range(n_bins + 12, n_bins + 17),
                    axis=1)

# Project the data into principal subspace of model data
pca = PCA(n_components=args.n_components)
pca.fit(X)
X = pca.transform(X)
dan_X = pca.transform(dan_X)
コード例 #3
0
ファイル: regression.py プロジェクト: hannah-rae/dan_analysis
        X, Y = datasets.read_acs_grid_data()
        X_test, Y_test, Y_chi2, test_names = datasets.read_dan_data(limit_2000us=False, label_source='asu')
        n_bins = len(datasets.time_bins_dan)-1
    elif args.model_grid == 'both':
        X_full, Y_full = datasets.read_sim_data(use_dan_bins=True)
        X_rover, Y_rover = datasets.read_grid_data(limit_2000us=True)
        X = np.concatenate([X_full, X_rover])
        Y = np.concatenate([Y_full, Y_rover])
        X_test, Y_test, Y_test_error, test_names = datasets.read_dan_data(limit_2000us=True)
        n_bins = 34
    X_train = X
    Y_train = Y
    n_test = X_test.shape[0]

# Normalize counts to approximately same range
X_train = datasets.normalize_counts(X_train)
X_test = datasets.normalize_counts(X_test)

# DAN bins have some count overlap in the early bins
# between CTN (total neutrons) and CETN, leading to 
# negative thermal counts in the early bins
if args.ignore_early_bins:
    X_train = np.take(X_train, range(5, n_bins)+range(n_bins+5, n_bins*2), axis=1)
    X_test = np.take(X_test, range(5, n_bins)+range(n_bins+5, n_bins*2), axis=1)

# These bins demonstrate the most dynamic range with respect to changing
# subsurface geochemistry: 18-34 for CTN and 13-17 for CETN
if args.use_restricted_bins:
    X_train = np.take(X_train, range(17, 34)+range(n_bins+12, n_bins+17), axis=1)
    X_test = np.take(X_test, range(17, 34)+range(n_bins+12, n_bins+17), axis=1)
print X_train.shape
コード例 #4
0
                    type=int,
                    default=3,
                    help='number of principal components to use for PCA')
parser.add_argument(
    '--normalize',
    action='store_true',
    help='normalize the data by dividing each bin by total counts')
args = parser.parse_args()

# Prepare the plot
fig, axes = plt.subplots(nrows=3, ncols=2, sharey=True, sharex=True)

# Plot the principal components of full grid data
data, _ = datasets.read_sim_data()
if args.normalize:
    data = datasets.normalize_counts(data)
pca = PCA(n_components=3)
pca.fit(data)
axes[0, 0].step(datasets.time_bins_sim,
                pca.components_[0][:len(datasets.time_bins_sim)],
                where='post',
                linewidth=2,
                label='PC 1')
axes[0, 0].step(datasets.time_bins_sim,
                pca.components_[1][:len(datasets.time_bins_sim)],
                where='post',
                linewidth=2,
                label='PC 2')
axes[0, 0].step(datasets.time_bins_sim,
                pca.components_[2][:len(datasets.time_bins_sim)],
                where='post',
コード例 #5
0
                    help='number of principal components to use for PCA')
parser.add_argument('--normalize',
                    action='store_true',
                    help='normalize the data before PCA')
args = parser.parse_args()

X_sebina, Y_sebina = datasets.read_acs_grid_data()
print Y_sebina.shape
X_dan, Y_dan, err_dan, names_dan = datasets.read_dan_data()
print Y_dan.shape

time_bins = datasets.time_bins_dan
n_bins = 64

if args.normalize:
    X_sebina = datasets.normalize_counts(X_sebina)
    X_dan = datasets.normalize_counts(X_dan)

pca = PCA(n_components=args.n_components)
X_t = pca.fit_transform(X_sebina)

# Plot the Sebina grid points in PC space
fig = plt.figure()
ax1 = fig.add_subplot(1, 1, 1, projection='3d')
ax1.set_xlabel('PC 1')
ax1.set_ylabel('PC 2')
ax1.set_zlabel('PC 3')
for x_t, (h, acs) in zip(X_t, Y_sebina):

    exists = False
    for [h_dan, acs_dan] in Y_dan: