def main(argv): # Parse arguments parser = argparse.ArgumentParser() parser.add_argument('-i', '--input_file', help='input file', required=True) parser.add_argument('-t', '--threshold', help='variance threshold', required=True) parser.add_argument('-o1', '--out_graph_1_file', help='variance graph file name', required=True) parser.add_argument('-o2', '--out_graph_2_file', help='variance accumulated graph file name', required=True) ARGS = parser.parse_args() descriptors = load_dataset(ARGS.input_file) pca = PCA() pca.fit(descriptors) ks = np.arange(pca.explained_variance_ratio_.size) + 1 variance_accumulated = np.cumsum(pca.explained_variance_ratio_) k_ideal = np.argmax(variance_accumulated > np.float64(ARGS.threshold)) print(k_ideal) # Plot the elbow with distortion fig = plt.figure() plt.plot(ks, pca.explained_variance_ratio_, 'bx') plt.axvline(x=k_ideal, color='r') plt.grid() plt.xlabel('k') plt.ylabel('Variance ratio') plt.title('PCA Variance Study') # plt.show() fig.savefig(ARGS.out_graph_1_file) # Plot the elbow with distortion fig = plt.figure() plt.plot(ks, variance_accumulated, 'bx') plt.axvline(x=k_ideal, color='r') plt.grid() plt.xlabel('k') plt.ylabel('Accumulated Variance') plt.title('PCA Variance Study') # plt.show() fig.savefig(ARGS.out_graph_2_file)
def prepare_data(): print("Loading data...") X_train, y_train, X_test, y_test = load_dataset( 'data/oppChallenge_gestures.data') # Sensor data is segmented using a sliding window mechanism X_test, y_test = opp_sliding_window(X_test, y_test, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP) print(" ..after sliding window (testing): inputs {0}, targets {1}".format( X_test.shape, y_test.shape)) # Data is reshaped since the input of the network is a 4 dimension tensor X_test = X_test.reshape( (-1, 1, SLIDING_WINDOW_LENGTH, NUM_SENSOR_CHANNELS)) return X_test, y_test
def test(): print("Loading data...") X_train, y_train, X_test, y_test = load_dataset('data/oppChallenge_gestures.data') assert NUM_SENSOR_CHANNELS == X_train.shape[1] # Sensor data is segmented using a sliding window mechanism X_test, y_test = opp_sliding_window(X_test, y_test, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP) print(" ..after sliding window (testing): inputs {0}, targets {1}".format(X_test.shape, y_test.shape)) # Data is reshaped since the input of the network is a 4 dimension tensor X_test = X_test.reshape((-1, 1, SLIDING_WINDOW_LENGTH, NUM_SENSOR_CHANNELS)) # Load model json_string = open('./runs/{}/model_pickle.json'.format(str(TEST_MODEL_NUMBER)), 'r').read() model = models.model_from_json(json_string) file_list = sorted(os.listdir('./runs/{}'.format(str(TEST_MODEL_NUMBER)))) weights_file = file_list[0] if file_list[0] is 'model_1_weights_sub.h5' else file_list[-1] model.load_weights('./runs/{}/{}'.format(str(TEST_MODEL_NUMBER), weights_file)) # Classification of the testing data print("Processing {0} instances in mini-batches of {1}".format(X_test.shape[0], BATCH_SIZE)) test_pred = np.empty(0) test_true = np.empty(0) start_time = time.time() for batch in iterate_minibatches(X_test, y_test, BATCH_SIZE): inputs, targets = batch y_pred = model.predict(inputs, batch_size=BATCH_SIZE) test_pred = np.append(test_pred, map(lambda i: i.argmax(), y_pred), axis=0) test_true = np.append(test_true, targets, axis=0) print "||Results||" print "\tTook {:.3f}s.".format(time.time() - start_time) import sklearn.metrics as metrics print "\tTest fscore:\t{:.4f} ".format(metrics.f1_score(test_true, test_pred, average='weighted'))
parser.add_argument('--gamma', type=float, default=0.1, nargs='?', help='LR is multiplied by gamma on schedule. Default 0.1') args = parser.parse_args() # Data print('==> Preparing data..') m_transforms = args.rt and [ transforms.RandomErasing(p=args.p, sl=args.sl, sh=args.sh, r1=args.r1) ] or [] print(m_transforms) trainloader, validloader, __, __ = load_dataset( directory=args.d, train_batch_size=args.train_batch, test_batch_size=args.test_batch, extra_transforms=m_transforms) # Model print('==> Building model..') netList = { 'LeNet': LeNet(), 'VGG19': VGG('VGG19'), 'PreActResNet18': PreActResNet18(), 'MobileNetV2': MobileNetV2(10), 'WRN_28_10':
def main(argv): # Parse arguments parser = argparse.ArgumentParser() parser.add_argument('-i', '--input_file', help='input file', required=True) parser.add_argument('-s', '--step', help='step', required=True) parser.add_argument('-ik', '--init_k', help='K initial', required=True) parser.add_argument('-fk', '--final_k', help='K final', required=True) parser.add_argument('-od', '--distortion_out_file', help='elbow distortion graph file', required=True) parser.add_argument('-os', '--silhouette_out_file', help='elbow silhoutte graph', required=True) parser.add_argument('-pca', '--pca', help='with pca', action='store_true') parser.add_argument('-k_pca', '--k_pca', help='k pca') ARGS = parser.parse_args() descriptors = load_dataset(ARGS.input_file) if ARGS.pca == True: print("With pca") pca = PCA(n_components=int(ARGS.k_pca)) descriptors = pca.fit_transform(descriptors) ks = [] distortions = [] silhouettes = [] for k in range(int(ARGS.init_k), int(ARGS.final_k), int(ARGS.step)): # kmeanModel = KMeans(n_clusters=k, init='k-means++') # kmeanModel.fit(descriptors) # predictions = kmeanModel.predict(descriptors) # cluster_centers_ = kmeanModel.cluster_centers_ kclusterer = KMeansClusterer( k, distance=nltk.cluster.util.cosine_distance) predictions = kclusterer.cluster(descriptors, assign_clusters=True) predictions = np.array(predictions) cluster_centers_ = np.array(kclusterer.means()) distortion = sum( np.min(distance.cdist(descriptors, cluster_centers_, 'cosine'), axis=1)) / descriptors.shape[0] silhouette_score = metrics.silhouette_score(descriptors, predictions, metric='cosine') distortions.append(distortion) silhouettes.append(silhouette_score) ks.append(k) print("k:", k, "distortion:", distortion, "Silhouette Coefficient", silhouette_score) # Plot the elbow with distortion fig = plt.figure() plt.plot(ks, distortions, 'bx-') plt.grid() plt.xlabel('k') plt.ylabel('Distortion') plt.title('The Elbow Method') fig.savefig(ARGS.distortion_out_file) # Plot the elbow with distortion fig = plt.figure() plt.plot(ks, silhouettes, 'bx-') plt.grid() plt.xlabel('k') plt.ylabel('Silhouette Score') plt.title('Silhouette Score analysis') fig.savefig(ARGS.silhouette_out_file)
) if valid_auc > best_auc: best_auc = valid_auc save_checkpoint(model, optimizer, config.checkpoint_path, filename=f'best_model_{time_now}.pth.tar', auc=valid_auc) if config.save_every: if i % config.save_every == 0: save_checkpoint(model, optimizer, config.checkpoint_path) if __name__ == "__main__": global time_now time_now = strftime('%d_%b_%H_%M_%S') logger = make_logger(time_now) config = load_train_parameters() print_flags(config, logger) dataset = load_dataset() logger.info(f"Dataset size: {len(dataset)}") train_loader, valid_loader = split_train_valid(dataset, train_batch_size=config.batch_size, valid_batch_size=config.valid_batch_size, validation_split=0.2, shuffle_dataset=True) train(train_loader, valid_loader, config, logger)
def train(): print("Loading data...") X_train, y_train, X_test, y_test = load_dataset( 'data/oppChallenge_gestures.data') assert NUM_SENSOR_CHANNELS == X_train.shape[1] # Sensor data is segmented using a sliding window mechanism X_train, y_train = opp_sliding_window(X_train, y_train, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP) X_test, y_test = opp_sliding_window(X_test, y_test, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP) print(" ..after sliding window (testing): inputs {0}, targets {1}".format( X_test.shape, y_test.shape)) # Data is reshaped since the input of the network is a 4 dimension tensor X_train = X_train.reshape( (-1, 1, SLIDING_WINDOW_LENGTH, NUM_SENSOR_CHANNELS)) X_test = X_test.reshape( (-1, 1, SLIDING_WINDOW_LENGTH, NUM_SENSOR_CHANNELS)) # network inputs = Input(shape=(1, SLIDING_WINDOW_LENGTH, NUM_SENSOR_CHANNELS)) conv1 = ELU()(Convolution2D(NUM_FILTERS, FILTER_SIZE, 1, border_mode='valid', init='normal', activation='relu')(inputs)) conv2 = ELU()(Convolution2D(NUM_FILTERS, FILTER_SIZE, 1, border_mode='valid', init='normal', activation='relu')(conv1)) conv3 = ELU()(Convolution2D(NUM_FILTERS, FILTER_SIZE, 1, border_mode='valid', init='normal', activation='relu')(conv2)) conv4 = ELU()(Convolution2D(NUM_FILTERS, FILTER_SIZE, 1, border_mode='valid', init='normal', activation='relu')(conv3)) # permute1 = Permute((2, 1, 3))(conv4) reshape1 = Reshape((8, NUM_FILTERS * NUM_SENSOR_CHANNELS))(conv4) gru1 = GRU(NUM_UNITS_LSTM, return_sequences=True, consume_less='mem')(reshape1) gru2 = GRU(NUM_UNITS_LSTM, return_sequences=False, consume_less='mem')(gru1) outputs = Dense(NUM_CLASSES, activation='softmax')(gru2) model = Model(input=inputs, output=outputs) # Save checkpoints timestamp = str(int(time.time())) os.mkdir('./runs/%s/' % timestamp) checkpoint = ModelCheckpoint( './runs/%s/weights.{epoch:03d}-{val_acc:.4f}.hdf5' % timestamp, monitor='val_acc', verbose=1, save_best_only=True, mode='max') json_string = model.to_json() open('./runs/%s/model_pickle.json' % timestamp, 'w').write(json_string) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(X_train, y_train, batch_size=BATCH_SIZE, nb_epoch=NUM_EPOCHES, verbose=1, callbacks=[checkpoint], validation_data=(X_test, y_test)) # starts training model.save_weights('./runs/%s/model_1_weights_sub.h5' % timestamp)
def train(): print("Loading data...") X_train, y_train, X_test, y_test = load_dataset( 'data/oppChallenge_gestures.data') assert NUM_SENSOR_CHANNELS == X_train.shape[1] # Sensor data is segmented using a sliding window mechanism X_train, y_train = opp_sliding_window(X_train, y_train, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP) X_test, y_test = opp_sliding_window(X_test, y_test, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP) print(" ..after sliding window (testing): inputs {0}, targets {1}".format( X_test.shape, y_test.shape)) # Data is reshaped since the input of the network is a 4 dimension tensor X_train = X_train.reshape( (-1, 1, SLIDING_WINDOW_LENGTH, NUM_SENSOR_CHANNELS)) X_test = X_test.reshape( (-1, 1, SLIDING_WINDOW_LENGTH, NUM_SENSOR_CHANNELS)) # network inputs = Input(shape=(1, SLIDING_WINDOW_LENGTH, NUM_SENSOR_CHANNELS)) conv1 = ELU()(Conv2D(filters=NUM_FILTERS, kernel_size=(1, FILTER_SIZE), strides=(1, 1), padding='same', kernel_initializer='random_normal', data_format='channels_last')(inputs)) conv2 = ELU()(Conv2D(filters=NUM_FILTERS, kernel_size=(1, FILTER_SIZE), strides=(1, 1), padding='same', kernel_initializer='random_normal', data_format='channels_last')(conv1)) conv3 = ELU()(Conv2D(filters=NUM_FILTERS, kernel_size=(1, FILTER_SIZE), strides=(1, 1), padding='same', kernel_initializer='random_normal', data_format='channels_last')(conv2)) conv4 = ELU()(Conv2D(filters=NUM_FILTERS, kernel_size=(1, FILTER_SIZE), strides=(1, 1), padding='same', kernel_initializer='random_normal', data_format='channels_last')(conv3)) reshape1 = Reshape((SLIDING_WINDOW_LENGTH, NUM_FILTERS * 1))(conv4) dropout1 = Dropout(DROPOUT_RATE)(reshape1) gru1 = GRU(NUM_UNITS_LSTM, return_sequences=True, implementation=2)(dropout1) dropout2 = Dropout(DROPOUT_RATE)(gru1) gru2 = GRU(NUM_UNITS_LSTM, return_sequences=False, implementation=2)(dropout2) # implementation=2 for GPU dropout3 = Dropout(DROPOUT_RATE)(gru2) outputs = Dense(NUM_CLASSES, activation=K.softmax, activity_regularizer=l2())(dropout3) model = Model(inputs=inputs, outputs=outputs) # Save checkpoints timestamp = str(int(time.time())) os.mkdir('./runs/%s/' % timestamp) checkpoint = ModelCheckpoint( './runs/%s/weights.{epoch:03d}-{val_acc:.4f}.hdf5' % timestamp, monitor='val_acc', verbose=1, save_best_only=True, mode='max') # Save model networks json_string = model.to_json(indent=4) open('./runs/%s/model_pickle.json' % timestamp, 'w').write(json_string) adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08) # rmsprop = RMSprop(lr=1e-4, rho=0.9, epsilon=1e-8) model.compile(optimizer=adam, loss='sparse_categorical_crossentropy', metrics=['acc']) model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=NUM_EPOCHES, verbose=1, callbacks=[checkpoint], validation_data=(X_test, y_test)) # starts training model.save_weights('./runs/%s/model_1_weights_sub.h5' % timestamp)
def main(argv): # Parse arguments parser = argparse.ArgumentParser() parser.add_argument('-i', '--input_file', help='input file', required=True) parser.add_argument('-ids', '--ids_file', help='ids file', required=True) parser.add_argument('-n_components', '--n_components', help='number of components in pca', required=True) parser.add_argument('-k', '--k', help='k of kmeans', required=True) ARGS = parser.parse_args() descriptors = load_dataset(ARGS.input_file) ids_list, news_groups = get_hash_ids(ARGS.ids_file) print("PCA") pca = PCA(n_components=int(ARGS.n_components)) descriptors = pca.fit_transform(descriptors) # kmeanModel = KMeans(n_clusters=int(ARGS.k), init='k-means++') # kmeanModel.fit(descriptors) # predictions = kmeanModel.predict(descriptors) # cluster_centers_ = kmeanModel.cluster_centers_ # print(predictions) print("Kmeans") kclusterer = KMeansClusterer(int(ARGS.k), distance=nltk.cluster.util.cosine_distance) predictions = np.array( kclusterer.cluster(descriptors, assign_clusters=True)) cluster_centers_ = np.array(kclusterer.means()) print("Distortions") # distortion_eu = sum(np.min(distance.cdist(descriptors, cluster_centers_, 'euclidean'), axis=1)) / descriptors.shape[0] distortion_cos = sum( np.min(distance.cdist(descriptors, cluster_centers_, 'cosine'), axis=1)) / descriptors.shape[0] print("Silhouettes") # silhouette_score_eu = metrics.silhouette_score(descriptors, predictions, metric='euclidean') silhouette_score_cos = metrics.silhouette_score(descriptors, predictions, metric='cosine') # print("EUCLIDEAN K:", ARGS.k, "distortion:", distortion_eu, "silhouette score:", silhouette_score_eu) print("COS K:", ARGS.k, "distortion:", distortion_cos, "silhouette score:", silhouette_score_cos) closest, _ = pairwise_distances_argmin_min(cluster_centers_, descriptors) medoids_ids = ids_list[closest] medoids = descriptors[closest] dist = distance.cdist(medoids, medoids, metric='cosine') # Five knns = dist.argsort(axis=1)[:, :6][:, 1:] for id_, knn in zip(medoids_ids, knns): print("\nMedoid id:", id_, "label:", news_groups[id_]) print("Cercanos:") for nn in knn: print("\t id:", medoids_ids[nn], "labels:", news_groups[medoids_ids[nn]]) metric = [] for i in range(int(225)): ids_l = ids_list[np.where(predictions == i)] # if len(ids_l) == 0: # counter_0+=1 # continue clusters_labels = [] for id_l in ids_l: label_list = news_groups[id_l] for ll in label_list: clusters_labels.append(ll) clnp = np.array(clusters_labels) uni, con = np.unique(clnp, return_counts=True) #letter_counts = Counter(clusters_labels) #df = pandas.DataFrame.from_dict(letter_counts, orient='index') ind = np.argsort(con)[::-1] uni = uni[ind] con = con[ind] maxim = con.sum() cont = con[0] label = uni[0] uni = uni[1:] con = con[1:] marker = np.zeros(uni.shape) for s in label.split('.'): for j in range(uni.shape[0]): if marker[j] == 0 and s in uni[j]: cont += con[j] marker[j] = 1 # print("cluster:", i, "metrica:", cont/maxim ) metric.append(cont / maxim) metric = np.array(metric, dtype=np.float) print("mean:", metric.mean()) print("std:", metric.std()) print("median:", np.median(metric)) print("Min:", np.min(metric)) print("Max:", np.max(metric)) return 0
import time from core.cnn import AudioCNN from torch.utils.data import DataLoader from utils.train_utils import * from utils.load_dataset import load_dataset MAX_TRACKS = 1000 config = load_train_parameters() start_time = time.time() transformed_dataset = load_dataset(config.data_path) print(f"Dataset size: {len(transformed_dataset)}") train_dl = torch.utils.data.DataLoader(transformed_dataset, batch_size=config.batch_size, shuffle=True) n_batches = len(train_dl) writer = SummaryWriter('tsne_embedding_runs', comment='tsne_embedding') model = AudioCNN() # Load checkpoint if config.checkpoint: checkpoint = torch.load(config.checkpoint) model.load_state_dict(checkpoint['model']) print("Checkpoint loaded") if torch.cuda.is_available():
from utils.visualize import subplot import matplotlib.cm as cm # TEST_SET = "att" TEST_SET = "mnist" if __name__ == "__main__": ###################################################################### # Load DataSet # DataSet CopyRight : AT&T Laboratories Cambridge # http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html ###################################################################### if TEST_SET == "att": file_path = "../../DataSet/PCA/orl_faces" X, T, shape = load_dataset(TEST_SET, file_path) ###################################################################### # Load DataSet # DataSet CopyRight : New York Univ. Google Labs. # http://yann.lecun.com/exdb/mnist/ ###################################################################### else: file_path = "../../DataSet/PCA/mnist/" X, T, shape = load_dataset(TEST_SET, file_path, set="training", selecteddigits=[2, 8]) [eigen_value, eigen_vector, mean] = pca(X, T) ######################################################################
) parser.add_argument('--output', dest='output', action='store_const', const=True, help="Whether to print the result report to file.") parser.add_argument( '--outfile', default='./results/svm/report.txt', nargs='?', help="File to save the result report. Default = './results/svm/report.txt'" ) args = parser.parse_args() # Get dataset and split into train and test __, __, train, test = load_dataset(directory=args.d, transform=False) train_x = train.train_data train_x = train_x.reshape((train_x.shape[0], -1)) train_y = np.asarray(train.train_labels) test_x = test.test_data test_x = test_x.reshape((test_x.shape[0], -1)) test_y = np.asarray(test.test_labels) print(train_x.shape, train_y.shape, test_x.shape, test_y.shape) # Whiten pca1 = PCA(args.pca_percent, whiten=True) train_x = pca1.fit_transform(train_x) test_x = pca1.transform(test_x) print('Shape of train dataset: {}'.format(train_x.shape))