Exemple #1
0
def listen():
    RESULT = " "
    print("EİS: Anahtar kelime bekleniyor.")

    with MIC as source: # mikrofonu 2sn dinle
        AUDIO = R.listen(source, phrase_time_limit=2)

    try: # anahtar kelime bulma
        KEYWORD = R.recognize_google(AUDIO, language='tr') # ses tanıma
        KEYWORD_STR = str(KEYWORD)
    except sr.UnknownValueError: # anlaşılmaz ise hata ayıklama
        KEYWORD_STR = " "

    if KEYWORD_STR.lower() == "merhaba": # anahtar kelime doğru ise komut al
        print("EİS: Komut için dinlemede.")
            
        with MIC as source: # mikrofonu 5sn dinle
            AUDIO = R.listen(source, phrase_time_limit=3)

        try: # Asıl algılama
            RESULT = R.recognize_google(AUDIO, language='tr') # ses tanıma
            print("Kullanıcı: " + str(RESULT))
        except sr.UnknownValueError: # anlaşılmaz ise hata ayıklama
            print('EİS: Anlaşılmadı.')

    if RESULT == "kapan":
        sys.exit("Kapanıyor...")

    normalization.normalize(RESULT) # anla ve davran kısmı
def bm_25(doc_len_arr, inverted_index, words, l_avg):
    print "EXECUTING BM25"
    result_list = defaultdict(int)
    num_doc_collection = len(doc_len_arr)
    print words
    #performs normalization to stay consistent with the index
    for i, w in enumerate(words):
        term = normalize(w)
        if term == "":
            words.remove(w)
        else:
            words[i] = term

    print words
    doc_unranked = get_docs_containing_word(inverted_index, words)

    words_freq_dict = get_frequencies(inverted_index, words)

    for doc in doc_unranked:
        d_length = doc_len_arr[doc]
        res = calculate_rsv(num_doc_collection, words_freq_dict, words, doc,
                            d_length, l_avg)
        result_list[res] = doc
        #adds return value to list where the score is the key and the doc the value
    return result_list
Exemple #3
0
def get_manual_win_df(window_size):
    manual_features_pt = pd.DataFrame.from_csv('./manual/pt_df/unnormalized_pt_features_df.csv')
    manual_labels_pt = pd.DataFrame.from_csv('./manual/pt_df/unnormalized_pt_labels_df.csv')['pt_label'].tolist()
    with open("./manual/pt_df/trip_dict.txt", "rb") as fp:  # Unpickling
        trip_dict = pickle.load(fp)

    # normalized the point features
    manual_features_pt = normalize(manual_features_pt[DL_FEATURES])
    # features_pt is a Data frame

    print "only collect the manual labelled data with user_id = 1"

    labels_win = cal_win_label_special_trip_dict(manual_labels_pt, window_size, trip_dict, user_id=1)
    features_win = cal_win_features_special_trip_dict(manual_features_pt, window_size, trip_dict, user_id=1)

    # normalize the features for window level
    if len(WIN_FEATURES) > 0:
        features_win = win_normalize(features_win)

    # check whether the features match with labels
    if len(features_win) != len(labels_win):
        logging.warning("the windows features are not matched with labels!!!!!!!!!!!!!!!!!!!!!!")

    manual_win_df = pd.DataFrame(features_win)
    manual_win_df['win_label'] = pd.Series(labels_win)

    # remove the window with label mix
    manual_win_df = manual_win_df[manual_win_df.win_label != 5]
    manual_win_df = manual_win_df[manual_win_df.win_label != -1]
    # now the win_df is unbalanced and has 4 labels
    return manual_win_df
Exemple #4
0
def get_app_win_df(window_size):
    app_features_pt = pd.DataFrame.from_csv('./pt_df/unnormalized_pt_features_df.csv')
    app_labels_pt = pd.DataFrame.from_csv('./pt_df/unnormalized_pt_labels_df.csv')['pt_label'].tolist()
    with open("./pt_df/trip_dict.txt", "rb") as fp:  # Unpickling
        trip_dict = pickle.load(fp)

    # normalized the point features
    app_features_pt = normalize(app_features_pt[DL_FEATURES])
    # app_features_pt is a Dataframe

    labels_win = cal_win_label(app_labels_pt, window_size, trip_dict)
    features_win = cal_win_features(app_features_pt, window_size, trip_dict)

    # normalize the features for window level
    if len(WIN_FEATURES) > 0:
        features_win = win_normalize(features_win)

    # check whether the features match with labels
    if len(features_win) != len(labels_win):
        logging.warning("the windows features are not matched with labels!!!!!!!!!!!!!!!!!!!!!!")

    app_win_df = pd.DataFrame(features_win)
    app_win_df['win_label'] = pd.Series(labels_win)

    # remove the window with label mix
    app_win_df = app_win_df[app_win_df.win_label != 5]
    # now the win_df is unbalanced and has 5 labels
    return app_win_df
Exemple #5
0
def main(cfg):
    try:
        # nltk.download("vader_lexicon")
        # nltk.download('wordnet')
        glbs = GlobalParameters()
        configs = get_cfg_files(cfg)
        total_files = len(configs)
        results = {}
        for i, config in enumerate(configs):
            print_message("Running config {}/{}".format(i + 1, total_files))
            set_global_parameters(config)
            print_run_details()
            dataset_dir = normalize()
            X, y = extract_features(dataset_dir)
            config_result = classify(X, y, glbs.K_FOLDS, glbs.ITERATIONS)
            glbs.RESULTS[glbs.FILE_NAME] = config_result
            glbs.RESULTS = add_results(glbs.RESULTS, glbs)
            if glbs.EXPORT_AS_BASELINE:
                export_as_baseline(config_result, config[1])
        if glbs.WORDCLOUD:
            print_message("Generating word clouds (long processes)")
            generate_word_clouds()
        add_results_glbs(results, glbs)
        write_results(divide_results(glbs.RESULTS))
        send_work_done(glbs.DATASET_DIR)
        print_message("Done!")
    except Exception as e:
        traceback.print_exc()
        send_work_done(glbs.DATASET_DIR,
                       "",
                       error=str(e),
                       traceback=str(traceback.format_exc()))
def recognizeFile(models, file, translate='', rotate='', scale=''):
    """!
    Match a single file and return the resulting scores as well as the
    normalization parameters.

    @param models list: The previously trained HMM models
    @param file String: The file containing the motion.
    @param translate String: The normalization type for correcting translation
    @param rotate String: The normalization type for correcting rotation
    @param scale String: The normalization type for correcting scaling
    @return An array of the model scores, translation, rotation, scaling parameters
    """
    #print(file)
    #read motion and normalize
    motion = input.read(file)
    motion,t,r,s = normalization.normalize(motion, translate, rotate, scale)

    plot.addPlot(motion[:,1:4], file)
    #writePointsToGrapherFile(motion, file)

    scores = []
    # check motion score (likelyhood) for each recording
    for i,model in enumerate(models):
        scores.append(float(model.score(motion)))
    return numpy.array(scores), t, r, s
Exemple #7
0
def process(infile):
    # Below, a series of text processing functions can be added serially,
    # with the ultimate output to be return to the calling function (@see
    # gpio.py's `process_file() function)
    output = normalization.normalize(infile)
    output = remove_tags.remove(output)
    # The below line will not need to change.
    return output
Exemple #8
0
def run_normalization():
    json_content = request.get_json()

    if "array" in json_content:
        array = json_content["array"]
        norm_array = normalize(array)
        return jsonify({"status": "ok", "result": norm_array})

    return jsonify({"status": "failed", "message": "No array"})
Exemple #9
0
def main(cfg):
    try:
        glbs = GlobalParameters()
        configs = get_cfg_files(cfg)
        results = {}
        n_test_dir = ""
        total_files = len(configs)
        for i, config in enumerate(configs):
            print_message("Running config {}/{}".format(i + 1, total_files))
            set_global_parameters(config)
            print_run_details()
            n_train_dir = normalize()
            if glbs.TEST_DIR != "":
                n_test_dir = normalize(test=True)
            train, tr_labels, test, ts_labels, all_features = extract_features(
                n_train_dir, n_test_dir)
            for selection in glbs.SELECTION:
                try:
                    train, test = get_selected_features(
                        selection, train, tr_labels, test, ts_labels,
                        all_features)
                except:
                    pass
            results[glbs.FILE_NAME] = classify(train,
                                               tr_labels,
                                               test,
                                               ts_labels,
                                               all_features,
                                               model_number=i)
            results = add_results(results)
        if glbs.WORDCLOUD:
            print_message("Generating word clouds (long processes)")
            generate_word_clouds()
        write_results(divide_results(results))
        send_work_done(glbs.TRAIN_DIR)
        print_message("Done!")
        # clean_backup_files()
    except Exception as e:
        traceback.print_exc()
        send_work_done(glbs.TRAIN_DIR,
                       "",
                       error=str(e),
                       traceback=str(traceback.format_exc()))
def score_all_reorderings(references, candidates):
    # Compute average kendall's tau over all sentences.
    assert len(candidates) == len(references)
    scores = {}
    for i in range(len(candidates)):
        reference, candidate = normalize(references[i], candidates[i])
        assert reference and candidate, "Normalization failed!"
        scores[i] = normalized_kendalls_tau(reference, candidate)
    print "Average normalized kendalls tau was %1.3f on %d sentences." % (
        float(sum(scores.values())) / len(scores.keys()), len(scores.keys()))
 def get_ngrams(self, _w, gram_size=2):
     if _w != EPS:
         w = '<' + strip_accents(normalize(_w.lower())) + '>'
         grams = set([
             w[i:i + n] for n in xrange(gram_size, gram_size + 1)
             for i in xrange(len(w)) if w[i:i + n] != '>'
         ])
     else:
         grams = set([])
     return grams
Exemple #12
0
def separate_reps(data_file, exercise, key, column_labels, epsilon=0.15, gamma=20, delta=0.5, beta=1):

	front_cut_values = [0, 0, 0, 25, 0, 50, 0, 25, 50, 100, 0, 100]
	back_cut_values = [0, 0, 0, 0, 25, 0, 50, 25, 50, 0, 100, 100]
	epsilon_values = [0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25]
	
	if exercise is 'pushup':
		delta = 0.2
		beta = 2

	for iteration in range(0,len(front_cut_values)):
		
		front_cut = front_cut_values[iteration]
		back_cut = back_cut_values[iteration]
		epsilon = epsilon_values[iteration]

		data = []

		#=====[ Format each line of data  ]=====
		# os.path.join('data/raw_data/squat_pushupData_10to20',
		with open(data_file) as f:
			for line in f:
				try:
					if 'Infinity' in line or 'NaN' in line:
						continue
					line = [float(x.replace('\r\n','')) for x in line.split(',')]
					data.append(line)
				except Exception as e:
					continue
					# print e


		#=====[ Make dataframe and readjust indices to take into account front and back cuts  ]=====
		df = pd.DataFrame(data, columns=column_labels)
		df = df[front_cut:df.shape[0]-back_cut]
		df = df.set_index([range(0,df.shape[0])])
			
		y_coords = np.array(df.get(key))
		mins = get_local_mins(y_coords, epsilon, gamma, delta, beta)
		reps = []

		#=====[ Get points from DF between each max found -- constitutes a single rep ]=====
		for index,x in enumerate(mins):
			if(index == len(mins) -1 ):
				continue
			rep = (df.loc[x:mins[index+1]-1]).copy(True)
			reps.append(rep.set_index([range(rep.shape[0])]))

		if len(reps) > 1:
			break

	if exercise is 'squat':
		return nz.normalize(df, reps)
	elif exercise is 'pushup': 
		return pnz.normalize(df, reps)
 def action(self, sensorState):
     norm = normalize(sensorState)
     dis = self.discretizer.discretize(norm)
     self.sensorStateHistory.append(dis)
     n = self.problem.required_state_sequence_length()
     if self.problem.goal(self.sensorStateHistory[-n:]):
         print("stop")
         return "stop"
     behavior, _ = self.search.choose_behavior(self.sensorStateHistory, self.graph, self.problem)
     print(behavior)
     return behavior
Exemple #14
0
def get_list_of_terms(tokenizedTermList, docId):
    token_list = list()
    # Loops through all the terms in the document and adds them to the list with their associated docId
    for term in tokenizedTermList:
        term = normalize(term)
        if term != '':
            tokenObj = Token(term, docId)
            token_list.append(tokenObj)
            #To remove duplicates uncomment the following
            #term_dict.append(term)
    return token_list
def normalize(f):

    f = normalization.normalize(f)
    f = sent_tokenize(f)
    processed = []

    for u in range(0, len(f)):
        tokens = tokenize(f[u])
        processed.extend(tokens)

    return processed
    def select_population(self, points_per_front, population_size,
                          combined_population_representation,
                          combined_evaluations, target_functions, z_min, z_max,
                          reference_points):

        structured_points, last_front_points = self.generate_structured_points(
            points_per_front, population_size)
        num_objs = len(target_functions)

        if len(structured_points) == population_size:
            #print('Next generation: ', structured_points)
            population_representation = combined_population_representation[
                structured_points]
            evaluations = combined_evaluations[structured_points]

            return population_representation, evaluations

        else:
            #print('Normalization needed.')
            #print('Structured points: ', structured_points)
            #print('Last front points: ', last_front_points)

            num_elements = len(structured_points) - len(last_front_points)
            next_generation = structured_points[:num_elements]
            #print('Next generation: ', next_generation)

            last_front_index = len(next_generation)
            num_K = population_size - len(next_generation)

            #print('Num points to be chosen from last front: ', num_K)

            normalized_evaluations = normalize(combined_evaluations,
                                               structured_points, num_objs,
                                               target_functions, z_min, z_max)

            reference_points_assignment, association_counts_structured_points, association_counts_next_generation, reference_points_perpencidular_distances = \
                associate_dask(structured_points, normalized_evaluations, reference_points, next_generation)

            selected_points = \
                associate_to_niche(structured_points, association_counts_next_generation,
                        reference_points_assignment, reference_points_perpencidular_distances, last_front_points,
                        last_front_index, num_K)

            population_representation = np.concatenate(
                (combined_population_representation[next_generation],
                 combined_population_representation[selected_points]),
                axis=0)
            evaluations = np.concatenate(
                (combined_evaluations[next_generation],
                 combined_evaluations[selected_points]),
                axis=0)

            return population_representation, evaluations
Exemple #17
0
def clustering(dic):

    df = pd.read_csv('./data/hateb.csv')
    td = []

    with open("./data/stop.txt", "r") as f:
        stop_list = [v.rstrip() for v in f.readlines() if v != '\n']

    # 1文書ずつ、単語に分割してリストに入れていく[([単語1,単語2,単語3],文書id),...]こんなイメージ
    # words:文書に含まれる単語のリスト(単語の重複あり)
    # tags:文書の識別子(リストで指定.1つの文書に複数のタグを付与できる)
    for i in range(len(df)):
        wordlist = parseText(text=str(df['content'][i]), sysdic=dic)
        # 単語の文字種の統一、つづりや表記揺れの吸収
        normalizedlist = [normalize(word) for word in wordlist]
        # ストップワードの除去
        stopremovedlist = remove_stopwords(normalizedlist, stop_list)
        td.append(TaggedDocument(words=stopremovedlist, tags=[i]))

    #モデル作成
    model = Doc2Vec(documents=td, dm = 1, vector_size=300, window=8, min_count=10, workers=4)

    #ベクトルをリストに格納
    vectors_list=[model.docvecs[n] for n in range(len(model.docvecs))]

    #ドキュメント番号のリスト
    doc_nums=range(len(model.docvecs))

    #クラスタリング設定
    #クラスター数を変えたい場合はn_clustersを変えてください
    n_clusters = 8
    kmeans_model = KMeans(n_clusters=n_clusters, verbose=1, random_state=1, n_jobs=-1)

    #クラスタリング実行
    kmeans_model.fit(vectors_list)

    #クラスタリングデータにラベル付け
    labels=kmeans_model.labels_

    #ラベルとドキュメント番号の辞書づくり
    cluster_to_docs = defaultdict(list)
    for cluster_id, doc_num in zip(labels, doc_nums):
        cluster_to_docs[cluster_id].append(doc_num)

    #クラスター出力
    for docs in cluster_to_docs.values():
        print(docs)


    # DataFrameにcluster_idのカラムを追加
    df['cluster_id'] = labels

    df.to_csv('data/hateb_cluster.csv')
Exemple #18
0
def insert(data):
    db = get_instance()
    cur = db.cursor()

    data = normalization.normalize(data)

    columns = data.keys()
    values = [data[col] for col in columns]

    stmt = 'INSERT INTO crimes (%s) VALUES %s'

    cur.execute(stmt, (AsIs(','.join(columns)), tuple(values)))
    # print(cur.mogrify(stmt, (AsIs(','.join(columns)), tuple(values))))
def main(img, model_core, model_top, model_bot):
    import cv2
    import numpy as np
    import gary_convert as gc

    import binarize as br
    import segment as sg
    import gaussfun as gau
    import normalization as norm
    heighty, widthx, ch = img.shape
    #cv2.imshow("input",img)
    img1 = np.zeros((heighty, widthx), np.uint8)
    print(ch)
    #*********************************************
    gc.gray_con(heighty, widthx, img)
    for i in range(0, heighty):
        for j in range(0, widthx):
            img1[i][j] = img[i, j, 0]
    print("Grayed")
    #cv2.imshow("Gray conversion",img1)
    #*********************************************
    '''nr.noisered(heighty,widthx,img1)
    print("filtered")'''
    #*********************************************
    gau.gauss(heighty, widthx, img1)
    #cv2.imshow("Gaussian Blurring",img1)
    #*********************************************
    norm.normalize(heighty, widthx, img1)
    print("Normalized")
    #cv2.imshow("contrast stretching",img1)
    #*********************************************
    br.bin_(heighty, widthx, img1)
    print("Binarized")
    #cv2.imshow("binarization",img1)
    #*********************************************
    a = sg.segFun(heighty, widthx, img1, model_core, model_top, model_bot)

    print("Segmented")
    return a
Exemple #20
0
    def _resolve_participant(self, finalist):
        search_name = normalize(finalist.Name)
        if finalist.Name in self._name_exceptions:
            search_name = self._name_exceptions[finalist.Name]

        for part in self._participants:
            if part.NormalizedName == search_name:
                part.Participations.append(finalist)
                return part

        new_participant = Participant(finalist)
        new_participant.NormalizedName = search_name # To make exceptions work properly.
        self._participants.append(new_participant)
        return new_participant
def plotFile(file, translate='', rotate='', scale=''):
    """!
    Read a single motion from a file and add it to the current plot list.

    @param file String: The file containing the motion.
    @param translate String: The normalization type for correcting translation
    @param rotate String: The normalization type for correcting rotation
    @param scale String: The normalization type for correcting scaling
    """
    #read motion and normalize
    motion = input.read(file)
    motion,t,r,s = normalization.normalize(motion, translate, rotate, scale)

    plot.addPlot(motion[:,1:4], file)
def datingClassTest():
    hoRatio = 0.10  #测试样本占总样本的比例
    k = 7  #
    datingDataMat, datingLabels = getData.file2Matrix(
        '../datas/datingTestSet2.txt')
    normMat = normalization.normalize(datingDataMat)
    m = normMat.shape[0]  #行数
    numTestVecs = int(m * hoRatio)  #测试样本数
    errorCount = 0  #错误样本数
    for i in range(numTestVecs):
        classifierResult = KNN.KNN(normMat[i, :], normMat[numTestVecs:m, :],
                                   datingLabels[numTestVecs:m], k)
        if (classifierResult != datingLabels[i]):
            errorCount += 1
    return float(errorCount) / float(numTestVecs)
    def nextBehavior(self, sensorState):
        print(sensorState, file=self.f)
        normSensorState = normalize(sensorState)
        dis = ",".join(map(str, self.discretizer.discretize(normSensorState)))

        if dis not in self.stateBehaviors:
            self.stateBehaviors[dis] = {}

        behavior = self.selectBehavior(self.stateBehaviors[dis])

        if behavior not in self.stateBehaviors[dis]:
            self.stateBehaviors[dis][behavior] = 0

        self.stateBehaviors[dis][behavior] += 1
        print(self.stateBehaviors)
        print(behavior)
        print(behavior, file=self.f)
        return behavior
Exemple #24
0
def main(dir_path, output_dir):
    '''
        Run Pipeline of processes on file one by one.
    '''
    files = os.listdir(dir_path)

    for file_name in files:

        file_dataframe = pd.read_csv(os.path.join(dir_path, file_name))

        cols = ['high', 'open', 'low', 'close', 'volume', 'adj_close']

        file_dataframe = interpolate(file_dataframe, cols)

        file_dataframe = normalize(file_dataframe, cols)

        file_dataframe.to_csv(os.path.join(output_dir, file_name),
                              encoding='utf-8')
def main(dir_path, output_dir):
    '''
        Run Pipeline of processes on file one by one.
    '''
    files = os.listdir(dir_path)

    for file_name in files:

        file_dataframe = pd.read_csv(os.path.join(dir_path, file_name))

        cols = ['high', 'open', 'low', 'close', 'volume', 'adj_close']

        file_dataframe = interpolate(file_dataframe, cols)

        file_dataframe = normalize(file_dataframe, cols)

        file_dataframe.to_csv(
            os.path.join(output_dir, file_name), encoding='utf-8')
def createMotionsAndLengths(path, translate='', rotate='', scale=''):
    """!
    Read the motions from a folder and create a concatenated array with the lengths.

    The given directory 'path' must contain a subdirectory 'training' containing the motions
    as individual csv files.

    @param path String: The path to the motion data.
    @param translate: The normalization for translating the motions
    @param rotate: The normalization for rotating the motions
    @param scale: The normalization for scaling the motions
    @return: The concatenated motions and a list of the motion lengths
    """
    # list of motions read from the file
    motions = []
    # length of motions read from the file
    lengths = []
    count = 0
    plot.clearPlot()
    input.logLn('\n- ' + '{:<10}'.format(path + ':'))
    # read all files from directory associated with one motion
    for file in sorted(glob.glob(path + '/training/*.csv')):
        print(file)
        input.logLn(string.basename(string.splitext(file)[0]))
        count += 1
        #read motion and normalize
        motion = input.read(file)
        motion,t,r,s = normalization.normalize(motion, translate, rotate, scale)

        # Add to plot all training plots
        plot.addPlot(motion[:,1:4], file)

        # Add motion to the list of motions
        motions.append(motion)
        # Add length (number of poses in motion) to the list of lengths
        lengths.append(len(motion))
    _, folderName = string.split(string.dirname(path))
    # Plot all training motions
    plot.plot('../plots/' + folderName + ' training')
    # The observations are a list of poses
    X = numpy.concatenate(motions)
    return X, lengths
Exemple #27
0
def get_manual_win_df(window_size):
    manual_features_pt = pd.DataFrame.from_csv(
        './manual/pt_df/unnormalized_pt_features_df.csv')
    manual_labels_pt = pd.DataFrame.from_csv(
        './manual/pt_df/unnormalized_pt_labels_df.csv')['pt_label'].tolist()
    with open("./manual/pt_df/trip_dict.txt", "rb") as fp:  # Unpickling
        trip_dict = pickle.load(fp)

    # normalized the point features
    manual_features_pt = normalize(manual_features_pt[DL_FEATURES])
    # features_pt is a Data frame

    print "only collect the manual labelled data with user_id = 1"

    labels_win = cal_win_label_special_trip_dict(manual_labels_pt,
                                                 window_size,
                                                 trip_dict,
                                                 user_id=1)
    features_win = cal_win_features_special_trip_dict(manual_features_pt,
                                                      window_size,
                                                      trip_dict,
                                                      user_id=1)

    # normalize the features for window level
    if len(WIN_FEATURES) > 0:
        features_win = win_normalize(features_win)

    # check whether the features match with labels
    if len(features_win) != len(labels_win):
        logging.warning(
            "the windows features are not matched with labels!!!!!!!!!!!!!!!!!!!!!!"
        )

    manual_win_df = pd.DataFrame(features_win)
    manual_win_df['win_label'] = pd.Series(labels_win)

    # remove the window with label mix
    manual_win_df = manual_win_df[manual_win_df.win_label != 5]
    manual_win_df = manual_win_df[manual_win_df.win_label != -1]
    # now the win_df is unbalanced and has 4 labels
    return manual_win_df
    def nextBehavior(self, sensorState):
        self.round += 1
        print("\nRound:", self.round)
        print("Algorithm:", self.algorithmState)
        print(sensorState, file=self.f)
        normSensorState = normalize(sensorState)
        discretized = self.discretizer.discretize(normSensorState)
        dis = self.graph.state_to_key(discretized)

        if self.previous_state is not None and self.previous_behavior is not None:
            self.graph.construct([self.previous_state, discretized], [self.previous_behavior])

        if dis not in self.stateBehaviors:
            self.sinceLastNewState = 0
            self.stateBehaviors[dis] = {}
            self.stateBehaviors[dis]["count"] = 0
            self.stateBehaviors[dis]["origin"] = discretized

        behavior = "explore"
        if self.algorithmState == "random":
            behavior = self.randomBehavior(dis, discretized)
        elif self.algorithmState == "intelligent":
            behavior = self.intelligentBehavior(dis, discretized)
        elif self.algorithmState == "search":
            behavior = self.searchBehavior(dis, discretized)

        if behavior not in self.stateBehaviors[dis]:
            self.stateBehaviors[dis][behavior] = 0

        self.stateBehaviors[dis][behavior] += 1
        self.stateBehaviors[dis]["count"] += 1
        print(self.stateBehaviors)
        print("State:", dis)
        print("Behavior:", behavior)
        print(behavior, file=self.f)
        self.previous_state = discretized
        self.previous_behavior = behavior
        self.graph.visualize("dot/graph.dot", self.initialState)
        self.graph.visualize("dot/graph_%d.dot" % self.round, self.initialState)
        return behavior
def searchAnd(dict, terms):
    tempDict = defaultdict(list)
    result = []
    for term in terms:
        term = normalize(term)
        if (term != ""):
            tempDict[term].extend(
                dict[term])  #will get the postings list for each term
            if (len(tempDict[term]) == 0):
                return "The word you are looking for could not be found."

    # find the smallest index
    smallest_term = findSmallestList(tempDict)
    result.extend(tempDict[smallest_term])
    del (tempDict[smallest_term])

    while (len(tempDict) > 0):
        smallest_term = findSmallestList(tempDict)
        result = intersect(result, deepcopy(tempDict[smallest_term]))
        del (tempDict[smallest_term])

    return result
Exemple #30
0
def main():
    dic = {}
    normalization = []
    bunsho_file = sys.argv[1]
    data, book_list = make_jp_data(bunsho_file)
    for i in range(len(data)):
        tfidf, word_list_i = calc_tfidf(data[i], data)
        word_for_normalization, list_for_normalization = make_normalization_data(
            data)
        normalization.append(
            normalize(word_list_i, tfidf, word_for_normalization,
                      list_for_normalization))

    print("\n\n===============similarity===============")
    similarity = calc_cosine_similarity(normalization)
    print(similarity)

    print("\n\n===============recommendation================")
    recommendation = recommend(similarity, book_list)
    if len(recommendation) == 0:
        print("There are no recommendation in the data.")
    else:
        for i, v in enumerate(recommendation.values()):
            print(i + 1, v)
 def test_normal(self):
     self.assertEqual(normalization.normalize("This is a normal sentence."), "This is a normal sentence.")
Exemple #32
0
def summarize(doc_list):

    stop_words = list(set(stopwords.words("english")))

    f = open('data/text_doc.json', 'r')
    text = json.load(f)
    f.close()

    f = open('data/tfidf_index.json', 'r')
    indices = json.load(f)
    f.close()

    all_summaries = dict()

    def row_normalize(A):

        return (A.transpose() * (1 / A.sum(1))).transpose()

    def idf(word):

        stemmer = PorterStemmer()
        word = stemmer.stem(word)

        #global indices
        if word in indices:
            idf_value = indices[word].values()[0][1]
        else:
            idf_value = 0.0000001
        return idf_value

    def idf_modified_cosine(str1, str2):

        x = TE.tokenize(str1)
        y = TE.tokenize(str2)

        num = den1 = den2 = 0.0

        for w in list(set(x + y)):
            if w not in stop_words:
                num += x.count(w) * y.count(w) * (idf(w)**2)

        for xi in x:
            if xi not in stop_words:
                den1 += (x.count(xi) * idf(xi))**2

        for yi in y:
            if yi not in stop_words:
                den2 += (y.count(yi) * idf(yi))**2

        result = num / np.sqrt(den1 * den2)
        return result

    def create_centrality_matrix(sentences):

        N = len(sentences)

        centrality_matrix = np.zeros([N, N])

        for i, senti in enumerate(sentences):
            for j, sentj in enumerate(sentences):
                centrality_matrix[i][j] = idf_modified_cosine(senti, sentj)

        centrality_matrix = row_normalize(centrality_matrix)

        return centrality_matrix

    def LexRank(str):

        sentences = sent_tokenize(str)
        N = len(sentences)
        M = create_centrality_matrix(sentences)
        d = 0.85

        page_rank = np.ones(N) / N
        degree = np.zeros(N)

        for u in range(N):
            for v in range(N):
                if u != v and M[u][v] > 0:
                    degree[u] += M[u][v]

        for i in range(100):
            for u in range(N):
                rank = 0
                for v in range(N):
                    if M[u][v] > 0 and u != v:
                        rank += (M[u][v] * page_rank[v] / degree[v])
                page_rank[u] = (1 - d) / N + d * rank

        summary = ''

        for i in heapq.nlargest(2, range(N), page_rank.take):
            summary += sentences[i]

        return summary

    for doc in doc_list:

        txt = text[doc]
        docname = doc

        summary = LexRank(normalization.normalize(txt))
        all_summaries[docname] = summary

        print "File ", i + 1, " : ", docname, " done."

    return all_summaries

    print("Summarization Done")
        '1.0, 1.0, 0.0': 'facing object',
        '0.0, 0.0, 1.0': 'object grabbed',
        '1.0, 0.0, 1.0': 'object grabbed and object in front',
        '1.0, 1.0, 1.0': 'object grabbed and facing object',
        }

if __name__ == '__main__':
    plt.rcParams.update({'figure.autolayout': True})
    filename = sys.argv[1]
    sensorStates, behaviors = dataloader.load(filename)
    discretizer = SimplifyingDiscretizer()
    graph = MarkovChainGraph()
    result = {}
    ss = []
    for sensorState in sensorStates:
        sensorState = normalize(sensorState)
        s = discretizer.discretize(sensorState)
        s = graph.state_to_key(s)
        ss.append(stateTranslate[s])
        if stateTranslate[s] in result:
            result[stateTranslate[s]] += 1
        else:
            result[stateTranslate[s]] = 1

    graph.construct(ss, behaviors)
    graph.visualize(filename + '.dot', ss[0])

    dictionary = plt.figure()

    axes = plt.gca()
    axes.set_ylim([0,1000])
Exemple #34
0
# -*- coding: utf-8 -*-

import numpy as np

import loaddata as ld

import gradientdescent as GD

import normalization as norm

X, Y, n = ld.load('data.txt')

X = norm.normalize(X, n)

X = X.reshape((n * 2))

tmp = []
for i in xrange(0, 2 * n, 2):#Оторвать мне руки 
    tmp.append(1)
    tmp.append(X[i])
    tmp.append(X[i + 1])

X = np.array(tmp).reshape(n, 3)

print X

alpha = 0.01;
iterations = 400;

theta = np.zeros((3, 1))#init fitting params
Exemple #35
0
	def __init__(self, finalist):
		self.Participations = [finalist]
		self.NormalizedName = normalize(self.Name)
    def process(self,
                subject,
                cap,
                mon,
                device,
                gaze_network,
                por_available=False,
                show=False):

        g_t = None
        data = {
            'image_a': [],
            'gaze_a': [],
            'head_a': [],
            'R_gaze_a': [],
            'R_head_a': []
        }
        if por_available:
            f = open('./%s_calib_target.pkl' % subject, 'rb')
            targets = pickle.load(f)

        frames_read = 0
        ret, img = cap.read()
        while ret:
            img = self.undistorter.apply(img)
            if por_available:
                g_t = targets[frames_read]
            frames_read += 1

            # detect face
            face_location = face.detect(img, scale=0.25, use_max='SIZE')

            if len(face_location) > 0:
                # use kalman filter to smooth bounding box position
                # assume work with complex numbers:
                output_tracked = self.kalman_filters[0].update(
                    face_location[0] + 1j * face_location[1])
                face_location[0], face_location[1] = np.real(
                    output_tracked), np.imag(output_tracked)
                output_tracked = self.kalman_filters[1].update(
                    face_location[2] + 1j * face_location[3])
                face_location[2], face_location[3] = np.real(
                    output_tracked), np.imag(output_tracked)

                # detect facial points
                pts = self.landmarks_detector.detect(face_location, img)
                # run Kalman filter on landmarks to smooth them
                for i in range(68):
                    kalman_filters_landm_complex = self.kalman_filters_landm[
                        i].update(pts[i, 0] + 1j * pts[i, 1])
                    pts[i, 0], pts[i, 1] = np.real(
                        kalman_filters_landm_complex), np.imag(
                            kalman_filters_landm_complex)

                # compute head pose
                fx, _, cx, _, fy, cy, _, _, _ = self.cam_calib['mtx'].flatten()
                camera_parameters = np.asarray([fx, fy, cx, cy])
                rvec, tvec = self.head_pose_estimator.fit_func(
                    pts, camera_parameters)

                ######### GAZE PART #########

                # create normalized eye patch and gaze and head pose value,
                # if the ground truth point of regard is given
                head_pose = (rvec, tvec)
                por = None
                if por_available:
                    por = np.zeros((3, 1))
                    por[0] = g_t[0]
                    por[1] = g_t[1]
                entry = {
                    'full_frame':
                    img,
                    '3d_gaze_target':
                    por,
                    'camera_parameters':
                    camera_parameters,
                    'full_frame_size': (img.shape[0], img.shape[1]),
                    'face_bounding_box':
                    (int(face_location[0]), int(face_location[1]),
                     int(face_location[2] - face_location[0]),
                     int(face_location[3] - face_location[1]))
                }
                [patch, h_n, g_n, inverse_M, gaze_cam_origin,
                 gaze_cam_target] = normalize(entry, head_pose)

                # cv2.imshow('raw patch', patch)

                def preprocess_image(image):
                    ycrcb = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)
                    ycrcb[:, :, 0] = cv2.equalizeHist(ycrcb[:, :, 0])
                    image = cv2.cvtColor(ycrcb, cv2.COLOR_YCrCb2RGB)
                    # cv2.imshow('processed patch', image)

                    image = np.transpose(image, [2, 0, 1])  # CxHxW
                    image = 2.0 * image / 255.0 - 1
                    return image

                # estimate the PoR using the gaze network
                processed_patch = preprocess_image(patch)
                processed_patch = processed_patch[np.newaxis, :, :, :]

                # Functions to calculate relative rotation matrices for gaze dir. and head pose
                def R_x(theta):
                    sin_ = np.sin(theta)
                    cos_ = np.cos(theta)
                    return np.array([[1., 0., 0.], [0., cos_, -sin_],
                                     [0., sin_, cos_]]).astype(np.float32)

                def R_y(phi):
                    sin_ = np.sin(phi)
                    cos_ = np.cos(phi)
                    return np.array([[cos_, 0., sin_], [0., 1., 0.],
                                     [-sin_, 0., cos_]]).astype(np.float32)

                def calculate_rotation_matrix(e):
                    return np.matmul(R_y(e[1]), R_x(e[0]))

                def pitchyaw_to_vector(pitchyaw):

                    vector = np.zeros((3, 1))
                    vector[0, 0] = np.cos(pitchyaw[0]) * np.sin(pitchyaw[1])
                    vector[1, 0] = np.sin(pitchyaw[0])
                    vector[2, 0] = np.cos(pitchyaw[0]) * np.cos(pitchyaw[1])
                    return vector

                # compute the ground truth POR if the
                # ground truth is available
                R_head_a = calculate_rotation_matrix(h_n)
                R_gaze_a = np.zeros((1, 3, 3))
                if type(g_n) is np.ndarray:
                    R_gaze_a = calculate_rotation_matrix(g_n)

                    # verify that g_n can be transformed back
                    # to the screen's pixel location shown
                    # during calibration
                    gaze_n_vector = pitchyaw_to_vector(g_n)
                    gaze_n_forward = -gaze_n_vector
                    g_cam_forward = inverse_M * gaze_n_forward

                    # compute the POR on z=0 plane
                    d = -gaze_cam_origin[2] / g_cam_forward[2]
                    por_cam_x = gaze_cam_origin[0] + d * g_cam_forward[0]
                    por_cam_y = gaze_cam_origin[1] + d * g_cam_forward[1]
                    por_cam_z = 0.0

                    x_pixel_gt, y_pixel_gt = mon.camera_to_monitor(
                        por_cam_x, por_cam_y)
                    # verified for correctness of calibration targets

                input_dict = {
                    'image_a': processed_patch,
                    'gaze_a': g_n,
                    'head_a': h_n,
                    'R_gaze_a': R_gaze_a,
                    'R_head_a': R_head_a,
                }
                if por_available:
                    data['image_a'].append(processed_patch)
                    data['gaze_a'].append(g_n)
                    data['head_a'].append(h_n)
                    data['R_gaze_a'].append(R_gaze_a)
                    data['R_head_a'].append(R_head_a)

                if show:

                    # compute eye gaze and point of regard
                    for k, v in input_dict.items():
                        input_dict[k] = torch.FloatTensor(v).to(
                            device).detach()

                    gaze_network.eval()
                    output_dict = gaze_network(input_dict)
                    output = output_dict['gaze_a_hat']
                    g_cnn = output.data.cpu().numpy()
                    g_cnn = g_cnn.reshape(3, 1)
                    g_cnn /= np.linalg.norm(g_cnn)

                    # compute the POR on z=0 plane
                    g_n_forward = -g_cnn
                    g_cam_forward = inverse_M * g_n_forward
                    g_cam_forward = g_cam_forward / np.linalg.norm(
                        g_cam_forward)

                    d = -gaze_cam_origin[2] / g_cam_forward[2]
                    por_cam_x = gaze_cam_origin[0] + d * g_cam_forward[0]
                    por_cam_y = gaze_cam_origin[1] + d * g_cam_forward[1]
                    por_cam_z = 0.0

                    x_pixel_hat, y_pixel_hat = mon.camera_to_monitor(
                        por_cam_x, por_cam_y)

                    output_tracked = self.kalman_filter_gaze[0].update(
                        x_pixel_hat + 1j * y_pixel_hat)
                    x_pixel_hat, y_pixel_hat = np.ceil(
                        np.real(output_tracked)), np.ceil(
                            np.imag(output_tracked))

                    # show point of regard on screen
                    display = np.ones((mon.h_pixels, mon.w_pixels, 3),
                                      np.float32)
                    h, w, c = patch.shape
                    display[0:h,
                            int(mon.w_pixels / 2 -
                                w / 2):int(mon.w_pixels / 2 +
                                           w / 2), :] = 1.0 * patch / 255.0
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    if type(g_n) is np.ndarray:
                        cv2.putText(display, '.', (x_pixel_gt, y_pixel_gt),
                                    font, 0.5, (0, 0, 0), 10, cv2.LINE_AA)
                    cv2.putText(display, '.',
                                (int(x_pixel_hat), int(y_pixel_hat)), font,
                                0.5, (0, 0, 255), 10, cv2.LINE_AA)
                    cv2.namedWindow("por", cv2.WINDOW_NORMAL)
                    cv2.setWindowProperty("por", cv2.WND_PROP_FULLSCREEN,
                                          cv2.WINDOW_FULLSCREEN)
                    cv2.imshow('por', display)

                    # also show the face:
                    cv2.rectangle(
                        img, (int(face_location[0]), int(face_location[1])),
                        (int(face_location[2]), int(face_location[3])),
                        (255, 0, 0), 2)
                    self.landmarks_detector.plot_markers(img, pts)
                    self.head_pose_estimator.drawPose(img, rvec, tvec,
                                                      self.cam_calib['mtx'],
                                                      np.zeros((1, 4)))
                    cv2.imshow('image', img)

                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        cv2.destroyAllWindows()
                        cap.release()
                        break

            # read the next frame
            ret, img = cap.read()

        return data
def evaluate(minority_label,
             majority_label,
             training_data, training_target,
             test_data, test_true_target,
             clf,
             p_synthetic_samples = None,
             p_majority_samples = None):
    '''
    Parameters
    ----------
    minor_label : 
    minor_label : 
    p_synthetic_samples : Sets parameter N for SMOTE. Tells how many synthetic samples are 
                          supposed to be generated.
                          If not None SMOTE is done.
    p_majority_samples : Sets how many majority samples should be used. 
                         n_majority_samples = p_majority_samples/100 * n_minority_samples.
                         If None no under sampling is done.
    '''

    #Normalize training data
    theta, sigma = calculate_mean_and_std_deviation(training_data)
    training_data = normalize(training_data, theta, sigma)
            
    #just train and test on labels
    minor_mask = (training_target == samples.get_target_number(minority_label))
    major_mask = (training_target == samples.get_target_number(majority_label))
    minority_samples = training_data[minor_mask]
    majority_samples = training_data[major_mask]
    
    minority_target = training_target[minor_mask]
    majority_target = training_target[major_mask]
    
    training_sizes = {minority_label: minority_samples.shape[0], 
                    majority_label: majority_samples.shape[0]}
    
    #Under-sampling
    if p_majority_samples is not None:
        logger.info("Under-sample majority class...")    
        n_majority_samples = p_majority_samples / 100 * minority_samples.shape[0]
        np.random.shuffle(majority_samples)
        majority_samples = majority_samples[:n_majority_samples]
        
        logger.info("Selected %d random majority samples." 
                    % majority_samples.shape[0])
        majority_target = np.empty(shape=(majority_samples.shape[0]))
        majority_target[:] = samples.get_target_number(majority_label)
    
    #SMOTE
    if p_synthetic_samples is not None:
        logger.info("SMOTE minority class...")
        
        #Create synthetic data and target
        synthetic_minor_samples = SMOTE(minority_samples, p_synthetic_samples, k = 5)
        synthetic_targets = np.empty(shape=(synthetic_minor_samples.shape[0]))
        synthetic_targets[:] = samples.get_target_number(minority_label)
        
        logger.info("Created %d synthetic minority samples from %d samples with N = %d." 
                    % (synthetic_minor_samples.shape[0], minority_samples.shape[0],
                       p_synthetic_samples))
        
        #Add synthetic data and target
        minority_samples = np.concatenate((minority_samples, synthetic_minor_samples))
        minority_target = np.concatenate((minority_target, synthetic_targets))
        
    #Put minorities and majorities together
    training_data = np.concatenate((minority_samples,majority_samples))
    training_target = np.concatenate((minority_target,majority_target))
    
    #Train
    logger.info("Train classifier...")
    clf.fit(training_data, training_target)

    #Just use targets for labels
    mask = (test_true_target == samples.get_target_number(minority_label))
    neg_mask = (test_true_target == samples.get_target_number(majority_label))
    
    evaluation_sizes = {minority_label: np.sum(mask), 
                        majority_label: np.sum(neg_mask)}
    
    test_data = np.concatenate((test_data[mask],test_data[neg_mask]))
    test_true_target = np.concatenate((test_true_target[mask],test_true_target[neg_mask]))
    
    #Normalize test data
    test_data = normalize(test_data, theta, sigma)
    
    test_predicted_target = clf.predict(test_data)
                
    logger.debug("Predicted classes: %s" % unicode(np.unique(test_predicted_target)))
    logger.debug("%d, %d" % (np.sum(test_predicted_target == samples.get_target_number(minority_label)), 
                             np.sum(test_predicted_target == samples.get_target_number(majority_label))))
                
    #Score test data, target
    logger.info("Calculate F1 Score...")
    precisions, recalls, f1_scores, _ = metrics.precision_recall_fscore_support(test_true_target, 
                                                                                test_predicted_target,
                                                                                pos_label = None)
    
    for precision, recall, f1_score, label \
    in izip(precisions, recalls, f1_scores, [minority_label, majority_label]):
        logger.info("%s: Recall = %.5f, Precision = %.5f, F1 Score = %.5f" % 
                    (label, recall, precision, f1_score))
        
    return precisions, recalls, f1_scores, evaluation_sizes, training_sizes
 def getInitialState(self, sensorState):
     self.initialState = self.discretizer.discretize(normalize(sensorState))
     return self.nextBehavior(sensorState)
Exemple #39
0
 def __init__(self, name, provterr=None):
     self.Name = name
     self.NormalizedName = normalize(name)
     self.ProvTerr = provterr
     self.FirstSeenYear = 9999
     self.LastSeenYear = 0
import tensorflow as tf
import numpy as np
import time
import batch
import normalization as norm
import discrimination as dsc

train = np.load('trainset.npy')
test = np.load('testset.npy')

trainSet = train[()]
trainFeatures = trainSet['features'].astype('float32').reshape(-1, 28, 28, 1)
trainFeatures = norm.normalize(trainFeatures)
trainLabels = trainSet['labels']

#print(trainFeatures[0])
#print(norm.normalize(trainFeatures)[0])

testSet = test[()]
testFeatures = testSet['features'].astype('float32').reshape(-1, 28, 28, 1)
testFeatures = norm.normalize(testFeatures)
testLabels = testSet['labels']

numOfFeatures = len(trainFeatures[0][0])
numOfLabels = len(trainLabels[0])
learning_rate = 0.002225
training_epochs = 100
batch_size = 32
train_keep_prob = 0.5

X = tf.placeholder(
Exemple #41
0
def norm_word(w):
    if w == NULL:
        return w
    n_w = strip_accents(normalize(w))
    return n_w
def separate_reps(data_file,
                  exercise,
                  key,
                  column_labels,
                  epsilon=0.15,
                  gamma=20,
                  delta=0.5,
                  beta=1):

    front_cut_values = [0, 0, 0, 25, 0, 50, 0, 25, 50, 100, 0, 100]
    back_cut_values = [0, 0, 0, 0, 25, 0, 50, 25, 50, 0, 100, 100]
    epsilon_values = [
        0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25
    ]

    if exercise is 'pushup':
        delta = 0.2
        beta = 2

    for iteration in range(0, len(front_cut_values)):

        front_cut = front_cut_values[iteration]
        back_cut = back_cut_values[iteration]
        epsilon = epsilon_values[iteration]

        data = []

        #=====[ Format each line of data  ]=====
        # os.path.join('data/raw_data/squat_pushupData_10to20',
        with open(data_file) as f:
            for line in f:
                try:
                    if 'Infinity' in line or 'NaN' in line:
                        continue
                    line = [
                        float(x.replace('\r\n', '')) for x in line.split(',')
                    ]
                    data.append(line)
                except Exception as e:
                    continue
                    # print e

        #=====[ Make dataframe and readjust indices to take into account front and back cuts  ]=====
        df = pd.DataFrame(data, columns=column_labels)
        df = df[front_cut:df.shape[0] - back_cut]
        df = df.set_index([range(0, df.shape[0])])

        y_coords = np.array(df.get(key))
        mins = get_local_mins(y_coords, epsilon, gamma, delta, beta)
        reps = []

        #=====[ Get points from DF between each max found -- constitutes a single rep ]=====
        for index, x in enumerate(mins):
            if (index == len(mins) - 1):
                continue
            rep = (df.loc[x:mins[index + 1] - 1]).copy(True)
            reps.append(rep.set_index([range(rep.shape[0])]))

        if len(reps) > 1:
            break

    if exercise is 'squat':
        return nz.normalize(df, reps)
    elif exercise is 'pushup':
        return pnz.normalize(df, reps)
Exemple #43
0
import time
import batch
import normalization as norm
import discrimination as dsc
device_name = tf.test.gpu_device_name()

#train = np.load('train_set.npy')
#test = np.load('imageset2.npy')
#valid = np.load('test_set.npy')

imgSize = 62

#testSet = test[()]
testSet = image_set
testFeatures = testSet['features'].astype('float32').reshape(-1,imgSize,imgSize,1)
testFeatures = norm.normalize(testFeatures)
#testLabels = testSet['labels']


numOfFeatures = 62
numOfLabels = 20
learning_rate = 0.000225
training_epochs = 100
batch_size = 64
train_keep_prob = 0.5
tf.reset_default_graph()

X = tf.placeholder(tf.float32, [None, numOfFeatures, numOfFeatures, 1])# X : placeholder for features
Y = tf.placeholder(tf.float32,[None, numOfLabels])# Y : placeholder for labels
keep_prob = tf.placeholder(tf.float32, None)# placeholder for dropout_rate
    training_set = json.load(f)

X = np.array(training_set["datapoint"])

# number of training examples
m = np.size(X, axis=0)

# number of classes in output
k = training_set["num_classes"]

# number of input features excluding bias node
n = np.size(X, axis=1)

# X is a matrix with m rows
# each row containing the features including the bias node
normalize(X)
X = np.hstack((np.ones((m, 1)), X))

# Y is a matrix with m rows
# each row with 0 or 1 for each output class
Y = np.zeros((m, k))
for i in range(m):
    Y[i][training_set["label"][i]] = 1

# parameters for multiplying with first and second layers
h = n + 2  # hidden layer size
Th1 = np.random.rand(n + 1, h - 1)
Th2 = np.random.rand(h, k)

lamb = 0.01  # regularization parameter
niter = 100000  # number of iterations for learning