def main():
    dir_name = 'Audio_Speech_Actors_01-24-2/'
    base_filename = 'Actor_'
    names = 'Actor'
    # define the address of the dataset
    file_diagram = [dir_name + base_filename + str(i) + '/' + names + str(i) + '_' + str(j) + '.wav'
                    for i, j in itertools.product(range(1, 25), range(5, 61))]


    d = pd.DataFrame({'Entropy': [], 'Duracion': [], 'Tempo': []})
    diagramas  = []
    for item in file_diagram:
        #using function emotions to get the embedder and duration in seconds of audio
        embedder, dur_sec, tempo, yk = emotions(item)
        signal =  emotion_second(item)
        entropy, diagramas = get_diag(yk, diagramas)

        d = d.append({'Entropy': float(entropy[0]),  'Duracion': dur_sec, 'Tempo': tempo}, ignore_index=True)
        print(item)

    machine_learning(d, diagramas)

    fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15, 14))
    # plotear la serie de tiempo y la embedida
    ax[0].plot(Signal)
    ax[1].plot(embedder[0, :], embedder[1, :])
    ax[2].plot(Signal)
    # plot the 3D embedding
    ax = fig.add_subplot(3, 1, 3, projection='3d')
    ax.plot(embedder3[0, :], embedder3[1, :], embedder3[2, :])

    return
Ejemplo n.º 2
0
def work_flow():

    # load data that will be used in the script
    dir_name = 'Audio_Speech_Actors_01-24-2/'
    base_filename = 'Actor_'
    names = 'Actor'

    # define the address of the dataset
    file = [
        dir_name + base_filename + str(i) + '/' + names + str(i) + '_' +
        str(j) + '.wav'
        for i, j in itertools.product(range(1, 25), range(1, 61))
    ]

    #initializate the dataframe to fill
    d = pd.DataFrame({'Entropy': [], 'Duracion': [], 'Tempo': []})
    diagramas = []
    for item in file:
        #using function emotions to get the embedder and duration in seconds of audio
        try:
            embedder, dur_sec, tempo = emotions(item)
            #get diagrams
            entropy, diagramas = get_diag(embedder, diagramas)

            d = d.append(
                {
                    'Entropy': float(entropy[0]),
                    'Duracion': dur_sec,
                    'Tempo': tempo
                },
                ignore_index=True)
            print(item)
        except:
            pass

    #plot(embedder)
    machine_learning(d, diagramas)

    return
Ejemplo n.º 3
0
    def post(self):
        # read information from csv file
        df = pd.read_csv("user_houses.csv",
                         usecols=[
                             "Identifier", "UserID", "Distance", "Bedroom",
                             "Bathroom", "Car", "Landsize", "BuildingArea",
                             "YearBuilt", "Lattitude", "Longtitude", "Suburb",
                             "Street", "Type", "Regionname", "Predicted_Price"
                         ])
        df.set_index(["Identifier"], inplace=True)
        print("###################df####################")
        print(df.to_string())
        # since post new data for predict price, so accessing dataset here is test dataset
        # for user to provide  a new house and its details into dataset
        house = request.json  # the content that post request provide
        df_log = pd.read_csv("log_file.csv",
                             usecols=["ID", "UserID", "Operation", "Time"])
        df_log.set_index(["ID"], inplace=True)
        print(df_log.to_string())
        get_time = time.asctime(time.localtime(time.time()))
        if df_log.empty:
            last_index = 0
        else:
            # last_file = df_log.tail(1)
            # print(int(last_file["ID"].values))
            # print(last_file.index)
            last_index = df_log.index[-1] + 1
        logID = last_index
        # df_log.loc[logID, "ID"] = last_index
        df_log.loc[logID, "UserID"] = house["UserID"]
        df_log.loc[logID, "Operation"] = "Predict"
        df_log.loc[logID, "Time"] = get_time
        print(df_log.to_string())
        df_log.to_csv("log_file.csv")
        if "Identifier" not in house:
            return {"message": "Identifier is missing."}, 400

        id = house[
            "Identifier"]  # id is the Identifier of the house which a user just post

        # check the Identifier whether already exist
        if id in df.index:
            return {
                "message":
                "Sorry, A house with identifier {} is already exist, please create a new identifier."
                .format(id)
            }, 400

        # put the value into the dataset
        df_predicted = pd.DataFrame(columns=[
            "Identifier", "UserID", "Distance", "Bedroom", "Bathroom", "Car",
            "Landsize", "BuildingArea", "YearBuilt", "Lattitude", "Longtitude",
            "Suburb", "Street", "Type", "Regionname"
        ])

        # print("在预测price之前的df;")
        print(df.to_string())
        for key in house:
            if str(key) == "Identifier":
                continue
            if key not in house_model.keys():
                # unexpected features
                return {
                    "message": "Property {} is invalid.".format(str(key))
                }, 400
            if key == "Suburb":  # whe  suburb part, check the input string whether valid or not
                if house[key] not in suburb_list:
                    return {
                        "message":
                        "Property {}'s value is invalid".format(str(key))
                    }, 400
            if key == "Street":
                if house[key] not in street_list:
                    return {
                        "message":
                        "Property {}'s value is invalid".format(str(key))
                    }, 400
            if key == "Type":
                if house[key] not in type_list:
                    return {
                        "message":
                        "Property {}'s value is invalid".format(str(key))
                    }, 400
            if key == "Regionname":
                if house[key] not in region_list:
                    return {
                        "message":
                        "Property {}'s value is invalid".format(str(key))
                    }, 400

            df_predicted.loc[0, key] = house[key]
            df.loc[id, key] = house[key]

        # recall the regression function to predict the new dataset, and return the dataset with predict price column

        # remove the user and password columns and send to the subset part to machine learning function
        # df1 = df.query("Identifer == @id")
        # only get a specific row information to predict price
        # print("在预测price之前的df;")
        print(df.to_string())
        # 这里调用 machine_learning 的信息可能还是用问题,主要是好想表格的输入输出类型不一致。

        df1 = ml.data_processing(df_predicted)
        result = ml.machine_learning(df1)  # result has price and index only

        print("*************result********************")
        print(result.to_string())
        price = result.loc[0, "Price"]

        df.loc[id, "Predicted_Price"] = price

        print("##############final df is ##############")
        print(df.to_string())
        df.to_csv('user_houses.csv')
        # 样品输出
        return {
            "HouseID":
            id,
            "price":
            price,
            "message":
            "The house {} information has been posted and predict price is {}".
            format(id, price)
        }, 200
Ejemplo n.º 4
0
    def put(self, id):

        df = pd.read_csv("user_houses.csv",
                         usecols=[
                             "Identifier", "UserID", "Distance", "Bedroom",
                             "Bathroom", "Car", "Landsize", "BuildingArea",
                             "YearBuilt", "Lattitude", "Longtitude", "Suburb",
                             "Street", "Type", "Regionname", "Predicted_Price"
                         ])

        df.set_index(["Identifier"], inplace=True)
        print(df.to_string())
        print(df.index)
        # the accessing dataset is test dataset
        args = parser.parse_args()

        # 我觉得可能不需要user和password作为query parameter了,因为expect model里面有user 和password
        user = float(args.get("UserID"))
        # password = args.get("Password")
        print(id)
        print(df.index)
        if id not in df.index:
            print("************************************************")
            api.abort(404, "House with id {} is not exist.".format(id))

        house = request.json  # covert the request to json

        df_log = pd.read_csv("log_file.csv",
                             usecols=["ID", "UserID", "Operation", "Time"])
        df_log.set_index(["ID"], inplace=True)
        print(df_log.to_string())
        get_time = time.asctime(time.localtime(time.time()))
        if df_log.empty:
            last_index = 0
        else:
            # last_file = df_log.tail(1)
            # print(int(last_file["ID"].values))
            # print(last_file.index)
            last_index = df_log.index[-1] + 1
        logID = last_index
        #df_log.loc[logID, "ID"] = last_index
        df_log.loc[logID, "UserID"] = int(df.loc[id, "UserID"])
        df_log.loc[logID, "Operation"] = "Update house Info"
        df_log.loc[logID, "Time"] = get_time
        print(df_log.to_string())
        df_log.to_csv("log_file.csv")

        # for checking the user whether has the right to access the house information
        print("################User ID ###############")
        print(df.loc[id, "UserID"])
        if df.loc[id, "UserID"] - user != 0:
            api.abort(
                403,
                "message: Sorry, you has no right to access house {}".format(
                    id))

        #
        # if user is not  df.loc[id, "UserID"]:
        #     abort(400, message="No permission to access the house information")
        #print("没有更新房子信息之前的:")
        print(df.to_string())
        if "Identifier" in house and id != house["Identifier"]:
            return {"message": "House Identifier cannot be changed."}, 400

        df_predicted = pd.DataFrame(columns=[
            "Identifier", "UserID", "Distance", "Bedroom", "Bathroom", "Car",
            "Landsize", "BuildingArea", "YearBuilt", "Lattitude", "Longtitude",
            "Suburb", "Street", "Type", "Regionname"
        ])
        # Update the house information
        # Update the house information
        for key in house:
            if str(key) == "Identifier":
                continue
            if key not in house_model.keys():
                return {
                    "message": "Property {} is invalid.".format(str(key))
                }, 400
            if key == "Suburb":  # whe  suburb part, check the input string whether valid or not
                if house[key] not in suburb_list:
                    return {
                        "message":
                        "Property {}'s value is invalid".format(str(key))
                    }, 400
            if key == "Street":
                if house[key] not in street_list:
                    return {
                        "message":
                        "Property {}'s value is invalid".format(str(key))
                    }, 400
            if key == "Type":
                if house[key] not in type_list:
                    return {
                        "message":
                        "Property \"{}\" value is invalid :\"{}\" ".format(
                            str(key), house[key])
                    }, 400
            if key == "Regionname":
                if house[key] not in region_list:
                    return {
                        "message":
                        "Property {}'s value is invalid".format(str(key))
                    }, 400

            df.loc[id, key] = house[key]
            if str(key) != "Price":
                df_predicted.loc[0, key] = house[key]
        # print("更新房子信息之后的:")
        print(df.to_string())
        # recall the regression function to predict price
        # price = regression_function(df).loc[id, "Price"]
        # price = df_price.loc[id,"Price"]
        # remove the user and password columns and send to the subset part to machine learning function
        # remove the user and password columns and send to the subset part to machine learning function
        print("##############df_predicted#############")
        print(df_predicted.to_string())
        df1 = ml.data_processing(df_predicted)
        result = ml.machine_learning(df1)  # result has price and index only
        # df.loc[id, "Price"] = result[id, "Price"]  # update the houses information in csv file
        # df.to_csv("user_houses.csv")
        print("#############reuslt####################")
        print(result.to_string())
        price = result.loc[0, "Price"]
        df.loc[id, "Predicted_Price"] = price
        print("############final df#############")
        print(df.to_string())
        df.to_csv('user_houses.csv')

        # return price, 200
        return {
            "message":
            "House {} information has been updated and predict price {}".
            format(id, price)
        }, 200
Ejemplo n.º 5
0
def page3():

    train_file_list = os.listdir(train_file_path)
    test_file_list = os.listdir(test_file_path)
    embed_model_list = os.listdir(embed_model_path)
    machine_model_list = os.listdir(machine_model_path)

    if request.method == "POST":

        # 시각화 버튼을 눌렀을 경우
        if request.form.get("visual_button"):

            response_data = request.form.get("visual_button")
            response_data = json.loads(response_data)
            print(response_data)

            trainFile = response_data['trainData']
            testFile = response_data['testData']

            # 데이터 읽기
            train = pd.read_csv(train_file_path + trainFile)
            test = pd.read_csv(test_file_path + testFile)

            # 결측치가 있는지 확인하기(우선은 제거하는 방식)
            if pd.isnull(train['x']).sum() > 0 or pd.isnull(
                    train['y']).sum() > 0:
                train = train.dropna()
            if pd.isnull(test['x']).sum() > 0 or pd.isnull(
                    test['y']).sum() > 0:
                test = test.dropna()

            train = train.sample(frac=1).reset_index(drop=True)
            test = test.sample(frac=1).reset_index(drop=True)

            # 1) 처음 임베딩 및 시각화인 경우 -> 임베딩 파라미터만 받아오면 됨
            # is_pre_embed 없음, is_pre_train 없음, machine_value []
            if 'is_pre_embed' not in response_data and 'is_pre_machine' not in response_data and response_data[
                    'machine_value'] == []:

                print('first-embed, no-machine')

                embed_type = response_data['embed_type']
                embed_params = get_embed_params(embed_type,
                                                response_data['embed_value'])

                # 임베딩
                X_train, X_test, y_train, y_test = embedding(
                    trainFile.split(".")[0], embed_type, train, test,
                    embed_params)

                # 차원축소
                dimension_type = response_data['dimension_type']
                dimension_reduction(dimension_type, X_train, X_test, y_train,
                                    y_test)

                return render_template(
                    'visualization.html',
                    visualization="embedding_and_visualization")

            # 2) pre 임베딩 및 시각화인 경우 -> 어떠한 파라미터도 받을 필요 없음
            # is_pre_embed 있음, is_pre_train 없음, embed_value [], machine_value []
            elif 'is_pre_embed' in response_data and 'is_pre_machine' not in response_data and response_data[
                    'embed_value'] == [] and response_data[
                        'machine_value'] == []:

                print('pre-embed, no-machine')

                embed_type = response_data['embed_type']
                pre_embed_model = response_data['pre_embed_model']

                # 임베딩
                X_train, X_test, y_train, y_test = pre_train_embedding(
                    embed_type, pre_embed_model, train, test)

                # 차원축소
                dimension_type = response_data['dimension_type']
                dimension_reduction(dimension_type, X_train, X_test, y_train,
                                    y_test)

                return render_template(
                    'visualization.html',
                    visualization="embedding_and_visualization")

            # 3) 처음 임베딩 및 처음 머신러닝 및 시각화인 경우 -> 임베딩, 머신러닝 파라미터 모두 받아오면 됨
            # is_pre_embed 없음, is_pre_train 없음, machine_value 있음
            elif 'is_pre_embed' not in response_data and 'is_pre_machine' not in response_data and response_data[
                    'machine_value'] != []:

                print('first-embed, first-machine')

                embed_type = response_data['embed_type']
                embed_params = get_embed_params(embed_type,
                                                response_data['embed_value'])

                machine_type = response_data['machine_type']
                machine_params = get_machine_params(
                    machine_type, response_data['machine_value'])

                # 임베딩
                X_train, X_test, y_train, y_test = embedding(
                    trainFile.split(".")[0], embed_type, train, test,
                    embed_params)

                # 차원축소
                dimension_type = response_data['dimension_type']
                dimension_reduction(dimension_type, X_train, X_test, y_train,
                                    y_test)

                # 머신러닝
                train_y_pred, test_y_pred = machine_learning(
                    embed_type, machine_type, X_train, X_test, y_train, y_test,
                    machine_params)

            # 4) pre 임베딩 및 처음 머신러닝 및 시각화인 경우 -> 머신러닝 파라미터만 받아오면 됨
            # is_pre_embed 있음, is_pre_train 없음, embed_value [], machine_value 있음
            elif 'is_pre_embed' in response_data and 'is_pre_machine' not in response_data and response_data[
                    'embed_value'] == [] and response_data[
                        'machine_value'] != []:

                print('pre-embed, first-machine')

                embed_type = response_data['embed_type']
                pre_embed_model = response_data['pre_embed_model']

                machine_type = response_data['machine_type']
                machine_params = get_machine_params(
                    machine_type, response_data['machine_value'])

                # 임베딩
                X_train, X_test, y_train, y_test = pre_train_embedding(
                    embed_type, pre_embed_model, train, test)

                # 차원축소
                dimension_type = response_data['dimension_type']
                dimension_reduction(dimension_type, X_train, X_test, y_train,
                                    y_test)

                # 머신러닝
                train_y_pred, test_y_pred = machine_learning(
                    embed_type, machine_type, X_train, X_test, y_train, y_test,
                    machine_params)

            # 5) pre 임베딩 및 pre 머신러닝 및 시각화인 경우 -> 어떠한 파라미터도 받을 필요 없음
            # is_pre_embed 있음, is_pre_train 있음
            elif 'is_pre_embed' in response_data and 'is_pre_machine' in response_data:

                print('pre-embed, pre-machine')

                embed_type = response_data['embed_type']
                machine_type = response_data['machine_type']

                pre_embed_model = response_data['pre_embed_model']

                # 임베딩
                X_train, X_test, y_train, y_test = pre_train_embedding(
                    embed_type, pre_embed_model, train, test)

                # 차원축소
                dimension_type = response_data['dimension_type']
                dimension_reduction(dimension_type, X_train, X_test, y_train,
                                    y_test)

                # 머신러닝
                train_y_pred, test_y_pred = pre_train_machine_learning(
                    embed_type, machine_type, X_train, X_test, y_train, y_test)

            # 훈련 종료 후 머신러닝 결과
            from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

            target_names = list(set(y_train))
            train_df = pd.DataFrame(confusion_matrix(y_train, train_y_pred),
                                    index=target_names,
                                    columns=target_names)
            test_df = pd.DataFrame(confusion_matrix(y_test, test_y_pred),
                                   index=target_names,
                                   columns=target_names)

            path = r'/home/ubuntu/project2/csv_files/'

            train_df.to_csv(path + 'confusion_matrix_train.csv', index=False)
            test_df.to_csv(path + 'confusion_matrix_test.csv', index=False)

            # 분류 평가 지표
            train_accuracy = accuracy_score(y_train, train_y_pred)
            train_precision = precision_score(y_train,
                                              train_y_pred,
                                              average='macro')
            train_recall = recall_score(y_train, train_y_pred, average='macro')
            train_f1 = f1_score(y_train, train_y_pred, average='macro')

            test_accuracy = accuracy_score(y_test, test_y_pred)
            test_precision = precision_score(y_test,
                                             test_y_pred,
                                             average='macro')
            test_recall = recall_score(y_test, test_y_pred, average='macro')
            test_f1 = f1_score(y_test, test_y_pred, average='macro')

            print('train accuracy: {}, test accuracy: {}'.format(
                train_accuracy, test_accuracy))
            print('train precision: {}, test precision: {}'.format(
                train_precision, test_precision))
            print('train recall: {}, test recall: {}'.format(
                train_recall, test_recall))
            print('train f1: {}, test f1: {}'.format(train_f1, test_f1))

            train_score_df = pd.DataFrame(columns=['Metrics', 'Score'])
            train_score_df['Metrics'] = [
                'accuracy', 'precision', 'recall', 'f1'
            ]
            train_score_df['Score'] = [
                round(train_accuracy, 2),
                round(train_precision, 2),
                round(train_recall, 2),
                round(train_f1, 2)
            ]
            train_score_df.to_csv(path + 'metrics_score_train.csv',
                                  index=False)

            test_score_df = pd.DataFrame(columns=['Metrics', 'Score'])
            test_score_df['Metrics'] = [
                'accuracy', 'precision', 'recall', 'f1'
            ]
            test_score_df['Score'] = [
                round(test_accuracy, 2),
                round(test_precision, 2),
                round(test_recall, 2),
                round(test_f1, 2)
            ]
            test_score_df.to_csv(path + 'metrics_score_test.csv', index=False)

            train_df = pd.read_csv(path +
                                   'embedding_and_visualization_train.csv')
            test_df = pd.read_csv(path +
                                  'embedding_and_visualization_test.csv')

            train_df['pred'] = train_y_pred
            train_df['success'] = train_df['pred'] == train_df['target']
            train_df['success'] = train_df['success'].astype(int)

            test_df['pred'] = test_y_pred
            test_df['success'] = test_df['pred'] == test_df['target']
            test_df['success'] = test_df['success'].astype(int)

            success_mapping_table = {0: "실패", 1: "성공"}
            train_df['success'] = train_df['success'].map(
                success_mapping_table)
            test_df['success'] = test_df['success'].map(success_mapping_table)

            train_df.to_csv(
                path + 'embedding_and_machinelearning_visualization_train.csv',
                index=False)
            test_df.to_csv(
                path + 'embedding_and_machinelearning_visualization_test.csv',
                index=False)

            return render_template(
                'visualization.html',
                visualization="embedding_and_machineLearning_visualization")

        return render_template('machineLearning.html',
                               train_file_list=train_file_list,
                               test_file_list=test_file_list,
                               embed_model_list=embed_model_list,
                               machine_model_list=machine_model_list)
    else:
        return render_template("machineLearning.html",
                               train_file_list=train_file_list,
                               test_file_list=test_file_list,
                               embed_model_list=embed_model_list,
                               machine_model_list=machine_model_list)