コード例 #1
0
    def testNormalization(self):
        print("** Test normalization **")

        the_normalizer = Normalizer("datasets/test_normalization.csv")

        normalized = [[0, 0, 0], [1, 1, 1], [0.5, 0.1, 0.9]]

        self.assertTrue(the_normalizer.normalize() == normalized,
                        "Normalized data doesn't match")
コード例 #2
0
def brain(command):
    response = ""
    command = command
    # from 0  =>> 15 is verb for search and find
    # from 16 =>> 21 is verb for open
    actions = [
        "search", "find", "view", "reach", "detect", "get", "catch", "explore",
        "achieve", "obtain", "pass", "check", "reveal", "expose", "observe",
        "show", "see", "listen", "hear", "open", "watch", "arise", "awaken",
        "call", "consciousness", "get up", "stir", "wake", "wake up"
    ]

    tokens = Tokenizer().tokenize(command)

    # call weather function if there is weather word and country or city name
    citiesORcountries = weatherFunction(command)
    if 'weather' in command.split() and citiesORcountries != []:
        return 'the weather in ' + citiesORcountries[0] + ' is ' + WeatherC(
        ).weatherForecast(citiesORcountries[0]) + ' today'

    action = None

    fileName = None
    # -----------------------------------<<Variable>>--------------------------------------------
    tagSentence = Tagger().tag(tokens)

    for counter in range(len(tagSentence)):
        # if tagSentence[counter][1] == 'VB' or tagSentence[counter][0] in self.actions:

        if tagSentence[counter][0] in actions:

            action = tagSentence[counter][0]

        elif tagSentence[counter][1] == 'NN':
            fileName = tagSentence[counter][0]

    normlizeAction = Normalizer().snowBallStemmer(action)

    if normlizeAction in actions:
        filePath = FileSearch().search(
            fileName)  # return list of file shared the same name

        if normlizeAction in actions[:15]:
            # for search about folder or file
            OpenMedia().openFile(filePath[0].split("//")[0])
            response = "i hope you're satisfied with our service"
            return response

        if normlizeAction in actions[15:21]:
            #if he
            if normlizeAction in [
                    'listen', 'hear', 'watch'
            ] and filePath[0].split('.')[1] != ['mp3', 'mp4', 'mkv']:

                pass
            OpenMedia().openFile(filePath[0])
コード例 #3
0
def detectCircle(im):
    # detect circles in the image
    n = Normalizer(170)

    im = n.crop(im)
    new = imutils.resize(im, height=170)
    if new.shape[1] > 170:
        new = imutils.resize(im, width=170)

    circles = cv2.HoughCircles(new, cv2.HOUGH_GRADIENT, 1.5, minDist=170, param2=30, minRadius=70, maxRadius=85)

    return not circles is None
コード例 #4
0
    def testCSVIntegrity(self):
        print("** Test CSV Integrity **")

        the_normalizer = Normalizer("datasets/test_normalization.csv")

        data = the_normalizer.get_csv()

        origin_data = [['0', '3', '0'], ['1', '33', '100'], ['0.5', '6', '90']]

        length = 3

        self.assertTrue(data == origin_data, "Data and CSV file doesn't match")
        self.assertTrue(length == the_normalizer.getRowLength(),
                        "Line length doesn't match")
コード例 #5
0
ファイル: kmeans.py プロジェクト: kermitsxb/SpamDetector
    def __init__(self, k, n, columns, datafile):
        """Constructeur pour la classe KMeanClusterer"""
        super(KMeanClusterer, self).__init__()

        # Number of clusters wanted
        self.k = k
        self.n = n

        self.is_over = False

        # columns to work with
        self.columns = sorted(columns)

        # Get CSV data
        norm = Normalizer(datafile)
        self.data = norm.normalize()
        self.row_length = norm.getRowLength()
        self.clusters = []
コード例 #6
0
    def brn(self):

        tagSentence = Tagger().tag(self.tokens)

        for counter in range(len(tagSentence)):
            # if tagSentence[counter][1] == 'VB' or tagSentence[counter][0] in self.actions:

            if tagSentence[counter][0] in self.actions:

                action = tagSentence[counter][0]


            elif tagSentence[counter][1] == 'NN':
                fileName = tagSentence[counter][0]

        normlizeAction = Normalizer().snowBallStemmer(action)



        if normlizeAction in self.actions:
            filePath = FileSearch().search(fileName)  # return list of file shared the same name

            if normlizeAction in self.actions[:15]:
                # for search about folder or file
                OpenMedia().openFile(filePath[0].split("//")[0])

            if normlizeAction in self.actions[15:21]:

                OpenMedia().openFile(filePath[0])




        else:
            pass
            # return "can you explain more"

# Brain("i wanna open workout").brn()
コード例 #7
0
    def setUp(self):
        self.datafile = "datasets/spambase_2.data"
        self.normalizer = Normalizer(self.datafile)

        pass
コード例 #8
0
    def getDatasetSize(self, datafile):

        norm = Normalizer()
        iris_data_matrix = norm.load_csv(datafile)
        return len(iris_data_matrix)
 def __init__(self):
     # Text normalizer
     self.normalizer = Normalizer()
コード例 #10
0
        if i != c:
            cv2.drawContours(new, [cnts[i]], -1, color, thickness=cv2.FILLED)

    if all(all(p == 255 for p in line) == True for line in new):
        return None

    return new


# Parse arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--imgs_folder", required=True, help="Images folder")
args = vars(ap.parse_args())
imgs_folder = args['imgs_folder']

N = Normalizer(170)

for img in os.listdir(imgs_folder):

    image = cv2.imread("{}/{}".format(imgs_folder, img), 0)

    display("original", image)

    thresh = cv2.threshold(image, 60, 255, cv2.THRESH_BINARY)[1]

    _, cnts, h = cv2.findContours(thresh.copy(), cv2.RETR_TREE,
                                  cv2.CHAIN_APPROX_SIMPLE)

    # Hierarchy: For each contour -> [next, previous, child, parent]
    n = h[0][0][2]  # first child
    c = []  # c -> external contours [contour, area, id]
コード例 #11
0
                            encoding="utf-8",
                            object_pairs_hook=collections.OrderedDict)
    fin.close()
    abbrevs = abbrev_json["abbreviation-entries"].keys()

    # word tokenizer
    token_json_filepath = os.path.join(lang_path, "token.json")
    wordtok = WordTokenizer(token_json_filepath,
                            abbrev_json["abbreviation-entries"].keys())

    # normalizer
    norm_json_filepath = os.path.join(lang_path, "norm.json")
    alphaexp_json_filepath = os.path.join(lang_path, "alphaexp.json")
    numexp_rule_filepath = os.path.join(lang_path, "numexp.rule")
    norm = Normalizer(norm_json_filepath, alphaexp_json_filepath,
                      numexp_rule_filepath,
                      abbrev_json["abbreviation-entries"])

    # sentence tokenizer
    sentence_json_filepath = os.path.join(lang_path, "sentence.json")
    senttok = SentenceTokenizer(sentence_json_filepath, raw_text_filepath)

    # ========================
    # run
    # ========================
    utts = []
    for sent in senttok.tokenize_iter():
        tokens, classes, puncs = wordtok.tokenize(sent)

        words = []
        for token, cls, punc in zip(tokens, classes, puncs):
コード例 #12
0
from normalization import Normalizer
import nltk
from nltk import bigrams
#================= Loading dataset and normalize it  ===========================
Normalizer = Normalizer()


def loading_dataSet():
    file = open("res/dataset.txt", "r")
    data = file.read()
    file.close()
    docs = data.split("\n")
    types = []
    train = []
    for d in docs:
        d = d.split()
        if len(d) != 0:
            types.append(d[0])
    print('dataset Count = ' + str(len(types)))
    normalized_corpus = Normalizer.normalize_corpus(docs)
    normalized_corpus.remove('')
    counter = 0
    for x in normalized_corpus:
        train.append((x, types[counter]))
        counter = counter + 1
    return train


normalized_dataset = loading_dataSet()
#===============================================================================
#========================= Starting Trainning dataset ==========================
コード例 #13
0
        return json.loads(res)

if __name__ == "__main__":

    # datafile = "kddcup.data_10_percent.csv"
    # fields = [0, 4, 5, 22, 24, 25, 28, 31, 32, 35, 37, 38]
    # header = False
    # fieldClass = 41
    # k = 23
    # n = 20

    datafile = "kddcup.data_1000.csv"
    header = False
    fields = [0, 4, 5, 22, 24, 25, 28, 31, 32, 35, 37, 38]
    fieldClass = 41
    k = 17
    n = 20

    # datafile = "iris.csv"
    # fields = [0, 1, 2, 3]
    # fieldClass = 4
    # header = True
    # k = 3
    # n = 50

    norm = Normalizer(datafile, header)
    res = norm.run(fields, fieldClass)
    classes = norm.classes
    kMeanClusterer = KMeanClusterer(res, classes, k, n)
    print json.dumps(kMeanClusterer.jsonify(), indent=2, separators=(',', ': '))