def testNormalization(self): print("** Test normalization **") the_normalizer = Normalizer("datasets/test_normalization.csv") normalized = [[0, 0, 0], [1, 1, 1], [0.5, 0.1, 0.9]] self.assertTrue(the_normalizer.normalize() == normalized, "Normalized data doesn't match")
def brain(command): response = "" command = command # from 0 =>> 15 is verb for search and find # from 16 =>> 21 is verb for open actions = [ "search", "find", "view", "reach", "detect", "get", "catch", "explore", "achieve", "obtain", "pass", "check", "reveal", "expose", "observe", "show", "see", "listen", "hear", "open", "watch", "arise", "awaken", "call", "consciousness", "get up", "stir", "wake", "wake up" ] tokens = Tokenizer().tokenize(command) # call weather function if there is weather word and country or city name citiesORcountries = weatherFunction(command) if 'weather' in command.split() and citiesORcountries != []: return 'the weather in ' + citiesORcountries[0] + ' is ' + WeatherC( ).weatherForecast(citiesORcountries[0]) + ' today' action = None fileName = None # -----------------------------------<<Variable>>-------------------------------------------- tagSentence = Tagger().tag(tokens) for counter in range(len(tagSentence)): # if tagSentence[counter][1] == 'VB' or tagSentence[counter][0] in self.actions: if tagSentence[counter][0] in actions: action = tagSentence[counter][0] elif tagSentence[counter][1] == 'NN': fileName = tagSentence[counter][0] normlizeAction = Normalizer().snowBallStemmer(action) if normlizeAction in actions: filePath = FileSearch().search( fileName) # return list of file shared the same name if normlizeAction in actions[:15]: # for search about folder or file OpenMedia().openFile(filePath[0].split("//")[0]) response = "i hope you're satisfied with our service" return response if normlizeAction in actions[15:21]: #if he if normlizeAction in [ 'listen', 'hear', 'watch' ] and filePath[0].split('.')[1] != ['mp3', 'mp4', 'mkv']: pass OpenMedia().openFile(filePath[0])
def detectCircle(im): # detect circles in the image n = Normalizer(170) im = n.crop(im) new = imutils.resize(im, height=170) if new.shape[1] > 170: new = imutils.resize(im, width=170) circles = cv2.HoughCircles(new, cv2.HOUGH_GRADIENT, 1.5, minDist=170, param2=30, minRadius=70, maxRadius=85) return not circles is None
def testCSVIntegrity(self): print("** Test CSV Integrity **") the_normalizer = Normalizer("datasets/test_normalization.csv") data = the_normalizer.get_csv() origin_data = [['0', '3', '0'], ['1', '33', '100'], ['0.5', '6', '90']] length = 3 self.assertTrue(data == origin_data, "Data and CSV file doesn't match") self.assertTrue(length == the_normalizer.getRowLength(), "Line length doesn't match")
def __init__(self, k, n, columns, datafile): """Constructeur pour la classe KMeanClusterer""" super(KMeanClusterer, self).__init__() # Number of clusters wanted self.k = k self.n = n self.is_over = False # columns to work with self.columns = sorted(columns) # Get CSV data norm = Normalizer(datafile) self.data = norm.normalize() self.row_length = norm.getRowLength() self.clusters = []
def brn(self): tagSentence = Tagger().tag(self.tokens) for counter in range(len(tagSentence)): # if tagSentence[counter][1] == 'VB' or tagSentence[counter][0] in self.actions: if tagSentence[counter][0] in self.actions: action = tagSentence[counter][0] elif tagSentence[counter][1] == 'NN': fileName = tagSentence[counter][0] normlizeAction = Normalizer().snowBallStemmer(action) if normlizeAction in self.actions: filePath = FileSearch().search(fileName) # return list of file shared the same name if normlizeAction in self.actions[:15]: # for search about folder or file OpenMedia().openFile(filePath[0].split("//")[0]) if normlizeAction in self.actions[15:21]: OpenMedia().openFile(filePath[0]) else: pass # return "can you explain more" # Brain("i wanna open workout").brn()
def setUp(self): self.datafile = "datasets/spambase_2.data" self.normalizer = Normalizer(self.datafile) pass
def getDatasetSize(self, datafile): norm = Normalizer() iris_data_matrix = norm.load_csv(datafile) return len(iris_data_matrix)
def __init__(self): # Text normalizer self.normalizer = Normalizer()
if i != c: cv2.drawContours(new, [cnts[i]], -1, color, thickness=cv2.FILLED) if all(all(p == 255 for p in line) == True for line in new): return None return new # Parse arguments ap = argparse.ArgumentParser() ap.add_argument("-i", "--imgs_folder", required=True, help="Images folder") args = vars(ap.parse_args()) imgs_folder = args['imgs_folder'] N = Normalizer(170) for img in os.listdir(imgs_folder): image = cv2.imread("{}/{}".format(imgs_folder, img), 0) display("original", image) thresh = cv2.threshold(image, 60, 255, cv2.THRESH_BINARY)[1] _, cnts, h = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # Hierarchy: For each contour -> [next, previous, child, parent] n = h[0][0][2] # first child c = [] # c -> external contours [contour, area, id]
encoding="utf-8", object_pairs_hook=collections.OrderedDict) fin.close() abbrevs = abbrev_json["abbreviation-entries"].keys() # word tokenizer token_json_filepath = os.path.join(lang_path, "token.json") wordtok = WordTokenizer(token_json_filepath, abbrev_json["abbreviation-entries"].keys()) # normalizer norm_json_filepath = os.path.join(lang_path, "norm.json") alphaexp_json_filepath = os.path.join(lang_path, "alphaexp.json") numexp_rule_filepath = os.path.join(lang_path, "numexp.rule") norm = Normalizer(norm_json_filepath, alphaexp_json_filepath, numexp_rule_filepath, abbrev_json["abbreviation-entries"]) # sentence tokenizer sentence_json_filepath = os.path.join(lang_path, "sentence.json") senttok = SentenceTokenizer(sentence_json_filepath, raw_text_filepath) # ======================== # run # ======================== utts = [] for sent in senttok.tokenize_iter(): tokens, classes, puncs = wordtok.tokenize(sent) words = [] for token, cls, punc in zip(tokens, classes, puncs):
from normalization import Normalizer import nltk from nltk import bigrams #================= Loading dataset and normalize it =========================== Normalizer = Normalizer() def loading_dataSet(): file = open("res/dataset.txt", "r") data = file.read() file.close() docs = data.split("\n") types = [] train = [] for d in docs: d = d.split() if len(d) != 0: types.append(d[0]) print('dataset Count = ' + str(len(types))) normalized_corpus = Normalizer.normalize_corpus(docs) normalized_corpus.remove('') counter = 0 for x in normalized_corpus: train.append((x, types[counter])) counter = counter + 1 return train normalized_dataset = loading_dataSet() #=============================================================================== #========================= Starting Trainning dataset ==========================
return json.loads(res) if __name__ == "__main__": # datafile = "kddcup.data_10_percent.csv" # fields = [0, 4, 5, 22, 24, 25, 28, 31, 32, 35, 37, 38] # header = False # fieldClass = 41 # k = 23 # n = 20 datafile = "kddcup.data_1000.csv" header = False fields = [0, 4, 5, 22, 24, 25, 28, 31, 32, 35, 37, 38] fieldClass = 41 k = 17 n = 20 # datafile = "iris.csv" # fields = [0, 1, 2, 3] # fieldClass = 4 # header = True # k = 3 # n = 50 norm = Normalizer(datafile, header) res = norm.run(fields, fieldClass) classes = norm.classes kMeanClusterer = KMeanClusterer(res, classes, k, n) print json.dumps(kMeanClusterer.jsonify(), indent=2, separators=(',', ': '))