コード例 #1
0
def simu_fit_test(arma_params,
                  ll,
                  order,
                  n_train_samples,
                  n_test_samples,
                  data='arma',
                  embedding='lead_lag'):
    inputSig = InputSig(data, embedding, order, ll=ll, arma_params=arma_params)

    train_X, train_y = get_input_X_y(inputSig,
                                     n_train_samples,
                                     start_row,
                                     n_processes=n_processes)
    max_train_X = np.amax(np.absolute(train_X), axis=0)
    train_X = train_X / max_train_X

    test_X, test_y = get_input_X_y(inputSig,
                                   n_test_samples,
                                   start_row + n_train_samples,
                                   n_processes=n_processes)
    test_X = test_X / max_train_X

    # Fit and evaluate algorithm
    learnSig = LearnSig(algo, inputSig, n_processes=n_processes)
    learnSig.train(train_X, train_y)
    test_results = learnSig.evaluate(test_X, test_y, metrics=['error_l2'])

    return (test_results['error_l2'])
def save_normalization_vector():
	inputSig=InputSig('quick_draw','lead_lag',order,ll=ll)

	batch_size=64*340
	n_iterations=n_tot//batch_size
	print(n_iterations)
	start_row=0
	print(inputSig.get_sig_dimension())
	max_SigX=np.zeros(inputSig.get_sig_dimension())
	print(max_SigX.shape)
	for i in range(n_iterations):
		print(start_row)
		SigX,train_y=get_input_X_y(
			inputSig,batch_size,start_row,n_processes=n_processes)
		max_SigX=np.maximum(np.max(np.absolute(SigX),axis=0),max_SigX)
		start_row+=batch_size
		print(max_SigX[15])
		print(np.max(np.absolute(SigX),axis=0)[15])
	
	np.save('norm_vec_quick_draw_generator.npy',max_SigX)
	return(max_SigX)
コード例 #3
0
    def __init__(self,
                 n_samples,
                 n_max_samples,
                 batch_size,
                 first_row,
                 cache,
                 order=6):
        self.word_encoder = LabelEncoder()
        self.word_encoder.classes_ = np.load('classes_quick_draw.npy',
                                             allow_pickle=True)

        print("Number of classes", len(self.word_encoder.classes_))
        self.batch_size = batch_size
        self.first_row = first_row // 340
        self.n_samples = n_samples // 340
        self.n_max_samples = n_max_samples // 340

        self.cache = cache

        self.inputSig = InputSig('quick_draw', 'lead_lag', order, ll=1)
        self.norm_vec = np.load('norm_vec_quick_draw_generator.npy')

        self.on_epoch_end()
コード例 #4
0
	"""

	n=np.shape(y_true)[0]
	score=0
	for i in range(n):
		continue_var=True
		for j in range(k):
			if (y_true[i] in y_pred[i,:(j+1)]) and continue_var:
				score+=1/(j+1)
				continue_var=False
	return(score/n)


print("Get input sig ")
inputSig=InputSig('quick_draw','lead_lag',6,ll=1)

test_X,test_y=get_input_X_y(
			inputSig,n_test_samples,first_row+n_max_train_samples+n_valid_samples,
			n_processes=n_processes)
norm_vec=np.load('norm_vec_quick_draw_generator.npy')
test_X=test_X/norm_vec


print("Predict")
pred_y_cat=model.predict(test_X,batch_size=batch_size)
top_3_pred =np.vstack([inputSig.word_encoder.classes_[np.argsort(-1*c_pred)[:3]] for c_pred in pred_y_cat])

test_labels=inputSig.word_encoder.inverse_transform(test_y)

print(test_labels.shape)
コード例 #5
0
    n_valid_samples = 30
    n_train_samples = 300
elif data == 'urban_sound':
    n_test_samples = 500
    n_valid_samples = 500
    n_train_samples = 4435
elif data == 'quick_draw':
    n_train_samples = 200 * 340
    n_valid_samples = 20 * 340
    n_test_samples = 20 * 340

start_row = 0
n_processes = 32

# Load input data
inputSig = InputSig(data, embedding, order, ll=ll)

train_X, train_y = get_input_X_y(inputSig,
                                 n_train_samples,
                                 start_row,
                                 n_processes=n_processes)
max_train_X = np.amax(np.absolute(train_X), axis=0)
train_X = train_X / max_train_X

valid_X, valid_y = get_input_X_y(inputSig,
                                 n_valid_samples,
                                 start_row + n_train_samples,
                                 n_processes=n_processes)
valid_X = valid_X / max_train_X

test_X, test_y = get_input_X_y(inputSig,
コード例 #6
0
    }

results_df = pd.DataFrame({
    'accuracy': [],
    'embedding': [],
    'algo': [],
    'order': [],
    'dyadic_level': [],
    'n_features': []
})

# Load input data
for dyadic_level in dyadic_level_list:
    print(dyadic_level)
    for order in order_dict[str(dyadic_level)]:
        inputSig = InputSig(data, embedding, order)

        train_X, train_y = get_input_X_y(inputSig,
                                         n_train_samples,
                                         start_row,
                                         n_processes=n_processes,
                                         dyadic_level=dyadic_level)
        max_train_X = np.amax(np.absolute(train_X), axis=0)
        train_X = train_X / max_train_X

        valid_X, valid_y = get_input_X_y(inputSig,
                                         n_valid_samples,
                                         start_row + n_train_samples,
                                         n_processes=n_processes,
                                         dyadic_level=dyadic_level)
        valid_X = valid_X / max_train_X
コード例 #7
0
    'accuracy': [],
    'embedding': [],
    'algo': [],
    'order': [],
    'n_features': []
})
for metric in metrics:
    results_df[metric] = []
print(results_df)

# Load input data
for embedding in embedding_list:
    for order in order_dict[embedding]:
        inputSig = InputSig(data,
                            embedding,
                            order,
                            ll=ll,
                            arma_params=arma_params)

        train_X, train_y = get_input_X_y(inputSig,
                                         n_train_samples,
                                         start_row,
                                         n_processes=n_processes)
        max_train_X = np.amax(np.absolute(train_X), axis=0)
        train_X = train_X / max_train_X

        valid_X, valid_y = get_input_X_y(inputSig,
                                         n_valid_samples,
                                         start_row + n_train_samples,
                                         n_processes=n_processes)
        valid_X = valid_X / max_train_X
コード例 #8
0
class QuickDrawGenerator(Sequence):
    ''' The object generating batches to train a neural network. For each
	batch, it loads the data, embeds it and computes signatures. At each
	epoch, n_samples samples are taken randomly among n_max_samples.

	Parameters
	----------

	n_samples: int
		Number of samples per epoch. n_samples//340 are selected for each of the
		340 classes.

	n_max_samples: int
		Number of samples among which n_samples are randomly selected.
		n_max_samples must be larger than n_samples.

	batch_size: int
		The batch size on which each gradient descent step is done.

	first_row: int
		For each class, first_row//340 is the first row from which
		samples are taken. 

	cache: dict
		A dictionary with all csv files loaded as pandas data frame to speed up
		file reading. The keys are integers from 0 to 339 and cache[i]
		is one pandas data frame corresponding to one class

	order: int, default=6
		The order of truncation of the signature.

	Attributes
	----------

	word_encoder: object of Class LabelEncoder()
		Stores the classes of the Quick, Draw! dataset.

	inputSig: object of Class InputSig()
		An instance of InputSig() that enables to compute signatures and
		contains information about the emnbedding and signatures.

	norm_vec: array, shape (p,)
		Normalization vector saved by the script
		preprocessing_generator_quick_draw that contains the maximum of each
		signature coefficient over the data. Signatures are divided by norm_vec
		so that every coefficient lies in [-1,1].

	'''
    def __init__(self,
                 n_samples,
                 n_max_samples,
                 batch_size,
                 first_row,
                 cache,
                 order=6):
        self.word_encoder = LabelEncoder()
        self.word_encoder.classes_ = np.load('classes_quick_draw.npy',
                                             allow_pickle=True)

        print("Number of classes", len(self.word_encoder.classes_))
        self.batch_size = batch_size
        self.first_row = first_row // 340
        self.n_samples = n_samples // 340
        self.n_max_samples = n_max_samples // 340

        self.cache = cache

        self.inputSig = InputSig('quick_draw', 'lead_lag', order, ll=1)
        self.norm_vec = np.load('norm_vec_quick_draw_generator.npy')

        self.on_epoch_end()

    def __len__(self):
        """ Returns the number of steps made in each epoch, that is the number
		of batches.
		"""
        return (np.ceil(self.n_samples * 340 / float(self.batch_size))).astype(
            np.int)

    def __getitem__(self, idx):
        """ Returns inputs for one batch training.

		Parameters
		----------
		idx: int,
			Index of the batch.

		Returns
		-------
		SigX: array, shape (batch_size,p)
			Matrix of signature coefficients of the batch samples.

		y: array, shape (batch_size,340)
			One-hot encoding of the classes array.
		"""
        #start_time=time.time()
        batch_start = idx * self.batch_size
        batch_stop = (idx + 1) * self.batch_size

        batch_files = self.indexes[batch_start:batch_stop, 0]
        batch_indexes = self.indexes[batch_start:batch_stop, 1]

        SigX = np.zeros((self.batch_size, self.inputSig.get_sig_dimension()))
        y = np.zeros(self.batch_size, dtype=object)
        for i in range(self.batch_size):
            path_i = self.cache[batch_files[i]]['file'][self.first_row +
                                                        batch_indexes[i]]
            SigX[i, :] = self.inputSig.path_to_sig(path_i)
            y[i] = self.cache[batch_files[i]]['Class'][self.first_row +
                                                       batch_indexes[i]]

        SigX = SigX / self.norm_vec
        return (SigX,
                to_categorical(self.word_encoder.transform(y),
                               num_classes=len(self.word_encoder.classes_)))

    def on_epoch_end(self):
        """ Generates a matrix with random indexes of samples used in each
		epoch.
		"""
        file_indexes = np.repeat(np.arange(340), self.n_samples)
        row_indexes = np.tile(
            np.random.choice(np.arange(self.n_max_samples),
                             size=self.n_samples,
                             replace=False), len(self.word_encoder.classes_))

        self.indexes = np.transpose(np.array([file_indexes, row_indexes]))
        print('Indexes array shape: ', self.indexes.shape)