def addUniqueData(self, filename, inputType, normalized=False): import os if not os.path.isfile(filename): raise Exception("addUniqueData : \"" + filename + "\" is not a file") if inputType >= DataCategory.TRAIN and inputType <= DataCategory.TEST: im = Normalize(self.Size(), filename, offset=self.__offset) array = [] import ImageOps if self.__grayScale: im = ImageOps.grayscale(im) if self.__negate: im = ImageOps.invert(im) if normalized: array = [float(x) / 255. for x in list(im.getdata())] else: array = [x for x in list(im.getdata())] else: for p in list(im.getdata()): # array.append(zip([float(x)/255. for x in p])) for i in xrange(len(p)): if normalized: array.append(float(p[i]) / 255.) else: array.append(p[i]) array = [x for x in array] self.Inputs()[inputType].append(array) import string groundTruth = string.split(os.path.splitext(os.path.split(filename)[1])[0], '-')[0] self.Targets()[inputType].append(int(groundTruth)) else: raise Exception("Incorrect DataCategory")
def GetSpectrograms(pathDataset, genres, duration, offset): Ms = [] labels = [] for filename in os.listdir(pathDataset): for filename1 in os.listdir(pathDataset + '\\' + filename): audio, fs = librosa.load(pathDataset + '\\' + filename + '\\' + filename1, duration=duration, offset=offset) MelSpectrogram = librosa.feature.melspectrogram(audio, fs, power=2.0) MelSpectrogramDb = librosa.power_to_db(MelSpectrogram) Ms.append(MelSpectrogramDb) MelSpectrograms = np.asarray(Ms) labels = np.repeat(genres, len(MelSpectrograms) / len(genres)) MelSpectrograms = MelSpectrograms.reshape(MelSpectrograms.shape[0], 1, MelSpectrograms.shape[1], MelSpectrograms.shape[2]) MelSpectrograms = MelSpectrograms.astype('float32') MelSpectrogramsNormalized = N.Normalize(MelSpectrograms) labels = np.repeat(genres, len(MelSpectrograms) / len(genres)) labels = CGTL.ConvertGenresToLabels(labels) labels = np_utils.to_categorical(labels, 10) return MelSpectrogramsNormalized, labels
def normalizeList(lists): result = [] for line in lists: lines = line.split("/*Cate=") if len(lines) == 2 and lines[0].find("|") < 0: lines[0] = Normalize.normalize(lines[0]) result.append(lines[0] + "/*Cate=" + lines[1]) else: result.append(line) return result
def GetTestData(pathDatasetTest,duration,offset): genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split() Ms=[] labels=[] for filename in os.listdir(pathDatasetTest): for filename1 in os.listdir(pathDatasetTest+'\\'+filename): audio,fs=librosa.load(pathDatasetTest+'\\'+filename+'\\'+filename1,duration=duration,offset=offset) MelSpectrogram=librosa.feature.melspectrogram(audio,fs,power=2.0) MelSpectrogramDb=librosa.power_to_db(MelSpectrogram) Ms.append(MelSpectrogramDb) MelSpectrograms=np.asarray(Ms) MelSpectrograms=MelSpectrograms.reshape(MelSpectrograms.shape[0],1, MelSpectrograms.shape[1], MelSpectrograms.shape[2]) MelSpectrograms=MelSpectrograms.astype('float32') MelSpectrogramsNormalized = N.Normalize(MelSpectrograms) labels=np.repeat(genres,len(MelSpectrograms)/len(genres)) return MelSpectrogramsNormalized,labels
def wordByWordISO999en(term, saf=Saf.AUTO_NUMBER_ROMAN, *, pad_digits=DEFAULT_PAD_DIGITS, ignored=None): """Word by Word (ISO 999 — English) «ISO by Word» """ # Rules # 8.1 casefold; strip accents (for English) [1] [2] # convert punctuation to space [4] # drop punctuation [5] # ignore ignored symbols [7] # 8.3 arrange only the first word numerically [6] # 8.6 optionally drop specified first words [3] numbers, roman_numerals, spell_numbers = Saf.flags(saf) if not numbers: pad_digits = 0 text = Lib.htmlToPlainText(term).casefold() # [1] text = delete_ignored_firsts(text, ignored) # [3] text = Normalize.normalize(text) # [2] chars = [] # [7] for c in text: if unicodedata.category(c) in LETTERS_OR_DIGITS_OR_PUNCT: chars.append(c) text = "".join(chars) text = PUNCTUATION_TO_IGNORE_RX.sub("", text) # [5] text = PUNCTUATION_AS_SPACE_RX.sub(" ", text) # [4] if numbers: # [6] words = [] for i, word in enumerate(text.split()): if i == 0: # First word only if spell_numbers: number = spelled_number(word, roman_numerals) ok = True else: number, ok = padded_number(word, roman_numerals, pad_digits) words.append(str(number) if ok else word) else: words.append(word) text = " ".join(words) return text
def test_Normalize(self): for i, phrase in enumerate(PHRASES): self.assertEqual(Normalize.normalize(phrase), EXPECTED[i])
def Separate_Normalize_Batch_CV_Train_Set(Neuron, position, train_indices, num_folds, i, length, Left_Only=False, method=None, param_values=None, normalize=False, filtering=0): print('number of folds', num_folds) cv_test_indices = train_indices[i * int(len(train_indices) / num_folds):(i + 1) * int(len(train_indices) / num_folds)] if i > 0: cv_train_indices1 = train_indices[0:i * int(len(train_indices) / num_folds)] cv_train_indices2 = train_indices[(i + 1) * int(len(train_indices) / num_folds):] Neuron1 = Neuron[cv_train_indices1, :] Neuron2 = Neuron[cv_train_indices2, :] test_Neuron = Neuron[cv_test_indices, :] if method is not None: print('processing method', method) if method == 'g': test_Neuron = fra.gaussianConvolution(test_Neuron, param_values) Neuron1 = fra.gaussianConvolution(Neuron1, param_values) Neuron2 = fra.gaussianConvolution(Neuron2, param_values) if method == 'w': test_Neuron = fra.windowMethod(test_Neuron, param_values) Neuron1 = fra.windowMethod(Neuron1, param_values) Neuron2 = fra.windowMethod(Neuron2, param_values) if method == 'a': test_Neuron = fra.alpha_function(test_Neuron, param_values) Neuron1 = fra.alpha_function(Neuron1, param_values) Neuron2 = fra.alpha_function(Neuron2, param_values) if method == 'trace': test_Neuron = fra.ApproxTrace(test_Neuron, param_values) Neuron1 = fra.ApproxTrace(Neuron1, param_values) Neuron2 = fra.ApproxTrace(Neuron2, param_values) train_Neuron = np.vstack((Neuron1, Neuron2)) avg, std = Normalize.CalcAvgStd(train_Neuron) test_Neuron = Normalize.Normalize(test_Neuron, avg, std) Neuron1 = Normalize.Normalize(Neuron1, avg, std) Neuron2 = Normalize.Normalize(Neuron2, avg, std) #Comment out next 6 lines to remove normalization if normalize == True: avg_pos = np.mean( np.vstack((position[cv_train_indices1, :], position[cv_train_indices2])), 0) position1 = Normalize.Demean(position[cv_train_indices1, :], avg_pos) position2 = Normalize.Demean(position[cv_train_indices2, :], avg_pos) test_position = Normalize.Demean(position[cv_test_indices, :], avg_pos) else: position1 = position[cv_train_indices1, :] position2 = position[cv_train_indices2, :] test_position = position[cv_test_indices, :] Neuron1, position1 = batches(Neuron1, position1, length, Left_Only) Neuron2, position2 = batches(Neuron2, position2, length, Left_Only) test_Neuron, test_position = batches(test_Neuron, test_position, length, Left_Only) train_Neuron = Neuron1 + Neuron2 train_position = position1 + position2 else: cv_train_indices = train_indices[int(len(train_indices) / num_folds):] train_Neuron = Neuron[cv_train_indices, :] train_position = position[cv_train_indices, :] test_position = position[cv_test_indices, :] test_Neuron = Neuron[cv_test_indices, :] if method is not None: if method == 'g': print('processing method', method) test_Neuron = fra.gaussianConvolution(test_Neuron, param_values) train_Neuron = fra.gaussianConvolution(train_Neuron, param_values) if method == 'w': test_Neuron = fra.windowMethod(test_Neuron, param_values) train_Neuron = fra.windowMethod(train_Neuron, param_values) if method == 'a': test_Neuron = fra.alpha_function(test_Neuron, param_values) train_Neuron = fra.alpha_function(train_Neuron, param_values) if method == 'trace': test_Neuron = fra.ApproxTrace(test_Neuron, param_values) train_Neuron = fra.ApproxTrace(train_Neuron, param_values) avg, std = Normalize.CalcAvgStd(train_Neuron) train_Neuron = Normalize.Normalize(train_Neuron, avg, std) test_Neuron = Normalize.Normalize(test_Neuron, avg, std) if normalize == True: avg_pos = np.mean(position[cv_train_indices, :], 0) train_position = Normalize.Demean(position[cv_train_indices, :], avg_pos) test_position = Normalize.Demean(position[cv_test_indices, :], avg_pos) train_Neuron, train_position = batches(train_Neuron, train_position, length, Left_Only) test_Neuron, test_position = batches(test_Neuron, test_position, length, Left_Only) test_position, train_position = np.asarray(test_position), np.asarray( train_position) test_Neuron, train_Neuron = np.asarray(test_Neuron), np.asarray( train_Neuron) if filtering > 0: filtered = velocityfiltering.velocity_filter(train_position[:, 0], train_position[:, 1], filtering) train_Neuron = train_Neuron[filtered, :, :] train_position = train_position[filtered, :] filtered = velocityfiltering.velocity_filter(test_position[:, 0], test_position[:, 1], filtering) test_Neuron = test_Neuron[filtered, :, :] test_position = test_position[filtered, :] return train_Neuron, train_position, test_Neuron, test_position
def Separate_Normalize_Batch_Test_Train_Set(Neuron, position, train_indices, test_indices, length, Left_Only=False, method=None, param_values=None, normalize=False, avg=None, std=None, pos_avg=None, zero=None, filtering=0): #avg_pos=np.mean(position[train_indices,:],0) #train_position=Normalize.Demean(position[train_indices,:],avg_pos) print(position.shape) train_position = position[train_indices, :] train_Neuron = Neuron[train_indices, :] test_Neuron = Neuron[test_indices, :] test_position = position[test_indices, :] print(test_position.shape) if method is not None: print('processing method', method) if method == 'g': test_Neuron = fra.gaussianConvolution(test_Neuron, param_values) train_Neuron = fra.gaussianConvolution(train_Neuron, param_values) if method == 'w': test_Neuron = fra.windowMethod(test_Neuron, param_values) train_Neuron = fra.windowMethod(train_Neuron, param_values) if method == 'a': test_Neuron = fra.alpha_function(test_Neuron, param_values) train_Neuron = fra.alpha_function(train_Neuron, param_values) if method == 'trace': test_Neuron = fra.ApproxTrace(test_Neuron, param_values) train_Neuron = fra.ApproxTrace(train_Neuron, param_values) if avg is None: avg, std = Normalize.CalcAvgStd(Neuron[train_indices, :]) train_Neuron = Normalize.Normalize(train_Neuron, avg, std) test_Neuron = Normalize.Normalize(test_Neuron, avg, std) if normalize == True: if avg_pos is None: avg_pos = avg_pos = np.mean(position[train_indices, :], 0) train_position = Normalize.Demean(position[train_indices, :], avg_pos) test_position = Normalize.Demean(position[test_indices, :], avg_pos) else: avg_pos = None if zero is not None: test_Neuron[:, zero] = 0 train_Neuron, train_position = batches(train_Neuron, train_position, length, Left_Only) test_Neuron, test_position = batches(test_Neuron, test_position, length, Left_Only) test_Neuron, train_Neuron = np.asarray(test_Neuron), np.asarray( train_Neuron) test_position, train_position = np.asarray(test_position), np.asarray( train_position) print(train_position.shape) if filtering > 0: if train_position.shape[0] > 0: filtered = velocityfiltering.velocity_filter( train_position[:, 0], train_position[:, 1], filtering) train_Neuron = train_Neuron[filtered, :, :] train_position = train_position[filtered, :] filtered = velocityfiltering.velocity_filter(test_position[:, 0], test_position[:, 1], filtering) test_Neuron = test_Neuron[filtered, :, :] test_position = test_position[filtered, :] return train_Neuron, train_position, test_Neuron, test_position, avg, std, avg_pos
def letterByLetterCMS16(term, saf=Saf.AUTO, *, pad_digits=DEFAULT_PAD_DIGITS, ignored=None): """Letter by Letter (Chicago Manual of Style) «CMS by Letter» Converts a term to a "sort as" text that when sorted "naturally" will give the letter by letter ordering specified in Chicago 16th edition 16.60. The captial letter that may appear between the first and second word is used to force the correct order of precedence. """ # ignored should be None or an empty set for main entries, and a # set of articles, conjunctions, and prepositions to ignore for # subentries. # # Rules (all words): # casefold [1] # strip accents [2] # aword-bword -> awordbword [3] # delete non-letters and non-digits [4] # O'Name -> oname [9] # Precedence (first word only): # word -> word [5] # word (.* -> word D .* [6] # word, .* -> word H .* [7] # word\s*[,;:.]*\d+.* -> word P \d+.* [8] # Subentries: remove leading articles, [10] # conjunctions, and prepositions. # Extensions: # word's -> words [X1] numbers, roman_numerals, spell_numbers = Saf.flags(saf) if not numbers: pad_digits = 0 # Phase #1 term = Lib.htmlToPlainText(term) term = delete_ignored_firsts(term, ignored) # [10] parts = [] precedence = None for i, word in enumerate(term.split()): word = word.casefold() # [1] word, hyphens = WORD_SEP_RX.subn("", word) # [3] if not word: continue word = Normalize.unaccented(word) # [2] # Must follow casefold & unaccented word = O_APOSTROPHE_RX.sub("o", word) # [9] word = APOSTROPHE_S_RX.sub("s", word) # [X1] theword = word.split()[0].rstrip() if i == 0: # First word if theword.endswith(","): precedence = "H" # [7] elif hyphens and word and word[-1].isdigit(): precedence = "P" # [8]a match = FINAL_DIGITS_RX.search(word) if match is not None: pos = match.start(1) word = word[:pos] + " " + word[pos:] elif i == 1 and precedence is None: # Second word if theword.startswith("("): precedence = "D" # [6] elif theword: if theword[0].isdigit(): precedence = "P" # [8]b word = NOT_LETTERS_OR_DIGITS_RX.sub(" ", word) # [3] j = i for word in word.split(): # [5] if i == 0 and spell_numbers: # First word only word = spelled_number(word, roman_numerals).replace(" ", "") elif pad_digits: word, done = padded_number(word, roman_numerals, pad_digits) if (done and j == 1 and (precedence is None or precedence in "LT")): precedence = "P" # [8]c parts.append(word) j += 1 if not parts: return "" # Phase #2 words = [parts[0]] if precedence is not None: words.append(precedence) words += parts[1:] return "".join(words)
offset=offsetEv) scoreEvaluation2 = model.evaluate(td, tl, verbose=1) print('The second evaluation score is: ' + str(scoreEvaluation2[1])) if (input( 'Do you want to select a file and try the automatic equalizer? (Yes/No) \n' ) == 'Yes'): while tryAgain == 'Yes': Tk().withdraw() filename = askopenfilename(title='Select an audio file') audio, fs = librosa.load(filename, duration=duration) wholeAudio, fs = librosa.load(filename) MelSpectrogram = librosa.feature.melspectrogram(audio, fs, power=2.0) MelSpectrogramDb = librosa.power_to_db(MelSpectrogram) MelSpectrogramDb = MelSpectrogramDb.reshape( (1, 1, MelSpectrogram.shape[0], MelSpectrogram.shape[1])) MelSpectrogramDb = N.Normalize(MelSpectrogramDb) predGenre = genres[model.predict_classes(MelSpectrogramDb)] print('The song belongs in the category ' + str(predGenre)) print('This is the mel-spectrogram of the original audio') MS = librosa.feature.melspectrogram(wholeAudio, fs, power=2.0) MSDb = librosa.power_to_db(MS) librosa.display.specshow(MSDb, x_axis='time', y_axis='mel', sr=fs, cmap='inferno') plt.colorbar(format='%+2.0f dB') plt.title('Mel-frequency spectrogram before equalization') plt.tight_layout() plt.show() t = np.arange(0.0, len(wholeAudio) / fs, 1 / fs)
def wordByWordNISO3(term, saf=Saf.AUTO_NUMBER_ROMAN, *, pad_digits=DEFAULT_PAD_DIGITS, ignored=None): """Word by Word (NISO Technical Report 3) «NISO by Word» Basic order: spaces, symbols, digits, letters. """ # The NISO 3 comprehensive example does not use ASCII order for # symbols; also it does not follow strict numerical order, e.g., # putting 007 before 1 2 3. This rule implementation follows the NISO # 3 rules correctly even when this differs from the example. # # Rules: # 3.1 Normalize spaces [1] # 3.2 Punctutation → space [2] # 3.3 Punctutation ignored [3a] [3b] # 3.4 Symbols (two+ treated as first) [4] # 3.5 Numbers before letters (works as-is) & don't spell numbers [8] # 3.6 Upper- and lower-case equal [6] # 3.6.1 Expand ligatures, combined letters, and strip accents [7] # 3.7 Super- and sub-scripts as normal chars [6] # 4.5 & 4.6 Leading articles shouldn't normally be ignored but can # be if the user requires [11] # 6 Numbers [9] # 6.3 Preserve decimal point if needed [10] # 7.1 Symbols are in ASCII order numbers, roman_numerals, _ = Saf.flags(saf) # Ignore spell_numbers if not numbers: pad_digits = 0 text = Lib.htmlToPlainText(term).casefold() # [6] text = delete_ignored_firsts(text, ignored) # [10] text = Normalize.normalize(text) # [7] chars = [] # [4] inSymbols = False for c in text: if unicodedata.category(c) not in LETTERS_OR_DIGITS_OR_PUNCT: if inSymbols: continue inSymbols = True else: inSymbols = False chars.append(c) text = "".join(chars) text = PUNCTUATION_TO_IGNORE1_RX.sub("", text) # [3a] text = PUNCTUATION_AS_SPACE_RX.sub(" ", text) # [2] if numbers: # [9] text = DIGITS_RX.sub(r"0\1", text).replace(",", "") words = [] for word in text.split(): parts = word.split(".") if len(parts) == 2: dec, ok = padded_number(parts[0], roman_numerals, pad_digits) if not ok: dec = "0" frac, ok = padded_number(parts[1], roman_numerals, pad_digits) number = "{}!{}".format(dec, frac) else: number, ok = padded_number(word, roman_numerals, pad_digits) if ok: number += "!" + ("0" * pad_digits) words.append(str(number) if ok else word) text = " ".join(words) text = PUNCTUATION_TO_IGNORE2_RX.sub("", text) # [3b] if numbers: text = text.replace("!", ".") # [10] text = FIX_CHAR_AFTER_NUMBER_RX.sub(r"\g<dec>.\g<frac>\g<nondec>", text) text = " ".join(text.split()) # [1] chars = [] for c in text: if (c not in " ." and unicodedata.category(c) not in LETTERS_OR_DIGITS): chars.append("!") chars.append(c) return "".join(chars)
import numpy as np import Normalize from matplotlib import pyplot as pl num = 5 tlen = 5000 # exponentially skewed AR(1) data = np.random.normal(size=(num, tlen + 100)) for t in xrange(1, tlen + 100): data[:, t] += data[:, t - 1] * 0.995 + 0.1 data = data[:, 100:] data = np.exp(data) # transformation trans = Normalize.Quantile(data) # visual comparison f, ax = pl.subplots(2, 1, figsize=(8, 6), sharex=True) ax[0].semilogy(data[:, -1000:].T) ax[1].plot(trans[:, -1000:].T) ax[0].text(0.95, 0.8, 'raw input', horizontalalignment='right', transform=ax[0].transAxes, bbox={ 'facecolor': 'white', 'alpha': 0.7, 'pad': 15 })
results_original = [] results_processed = [] for iteration in range(0, 10): print('ITERATION = ', iteration + 1) training_data, training_labels, test_data, test_labels = pf.randomSample( DATASET, LABELS, 0.8) training_data_bk = training_data.copy() test_data_bk = test_data.copy() training_labels_bk = training_labels.copy() test_labels_bk = test_labels.copy() #norm_obj = norm.Normalize(training_data, test_data) norm_obj = norm.Normalize(training_data, 'train') norm_training_data = norm_obj.getTrainData() #print(norm_training_data) norm_test_data = norm_obj.getTestData(test_data) #print(norm_test_data) #prep = pf.CobbDouglas(training_data, training_labels) #print('Printing labels: ', training_labels) #ELASTICITIES, CONSTANT = prep.findRegressionCoefficients() prep = pf.CobbDouglas(norm_training_data, training_labels) #print('Printing labels: ', training_labels) ELASTICITIES, CONSTANT = prep.findRegressionCoefficients() PROCESSED_TRAINING_DATA = pf.elasticExponentiation(norm_training_data, ELASTICITIES, CONSTANT)
# GET DATA #data = get_data(settings["ticker_file"], settings["markets_target"]) data_path = os.getcwd() + "\\app\\src\\data\\BTC_NEO_100.csv" data = pd.read_csv(data_path, delimiter = ",") data = data.drop('Unnamed: 0', axis=1) # Display fig = plt.figure(figsize=(21,7)) data[0]["data"]["bid"].plot(label="bid", title="Bid curve") plt.grid() plt.legend() plt.show() # NORMALIZATION OF THE DATA norm = no.Normalisation(data) all_normalize_data = norm.fit(settings["normalisation_fit_type"], settings["labelisation_features_name"], settings["normalisation_fit_target"]) normalize_data = norm.get_normalize_data(0) # PEAKS DETECTION peaksmax, peaksmin = st.peakdet(normalize_data[settings["normalisation_target"]], settings["labelisation_disparity"]) # Display peaks fig = plt.figure(figsize=(21,7)) plt.plot(peaksmax[:,0], peaksmax[:, 1], 'ro', label="Max peaks") plt.plot(peaksmin[:,0], peaksmin[:, 1], 'go', label="Minimum peaks") plt.plot(normalize_data["bid"], label="Bid") plt.grid()
def _computeSortValue(self): def numberForText(text): if not text: return 0 text = text.upper() if len(text) == 1: return 100 * ord(text[0]) return (100 * ord(text[0])) + ord(text[1]) texts = [] for child in self: text = Normalize.normalize(child.text.casefold()) i = text.rfind("@") if i > -1: text = text[i + 1:] texts.append(PARENTHESES_RX.sub(" ", text)) values = [] for text in NOT_LETTER_OR_DIGIT_RX.split(" ".join(texts)): if text: value = Util.valueOf(text, addOffset=True) if value is not None: values.append(value) else: if Util.isdigit(text[0]): index = 1 while (index < len(text) and Util.isdigit(text[index])): index += 1 values.append(int(text[:index]) + Util.INTEGER_OFFSET) rest = text[index:] if rest: try: values.append( roman.fromRoman(rest.upper()) + Util.ROMAN_OFFSET) except roman.RomanError: values.append(numberForText(rest)) elif text[0] in "cdilmvx": match = ROMAN_INSIDE_RX.search(text) if match is not None: before = match.group(1) if before: values.append(numberForText(before)) try: digits = match.group(2) values.append( roman.fromRoman(digits.upper()) + Util.ROMAN_OFFSET) except roman.RomanError: values.append(numberForText(digits)) after = match.group(3) if after: values.append(numberForText(after)) else: values.append(numberForText(text)) else: values.append(numberForText(text)) values = values + ([0] * (Util.SORT_VALUE_COMPONENT_COUNT - len(values))) self._sort_value = " ".join( ("{:010}".format(value) for value in values))