Ejemplo n.º 1
0
    def read_alumni_personal_data(self, data: pd.DataFrame, personal_header,
                                  graduated_year):

        try:
            data = data.loc[1:, :]
            data.drop_duplicates(subset=personal_header[0],
                                 keep=False,
                                 inplace=True)

            data.rename(columns={
                personal_header[0]: 'alumni_id',
                personal_header[1]: 'gpax',
                personal_header[2]: 'branch_name',
                personal_header[3]: 'company',
                personal_header[4]: 'status',
                personal_header[5]: 'job_description',
                personal_header[6]: 'salary',
                personal_header[7]: 'institution',
                personal_header[9]: 'branch',
                personal_header[10]: 'apprentice',
                personal_header[8]: 'faculty'
            },
                        inplace=True)

            data.astype({'alumni_id': str})

            # alumni table
            alumni = data.loc[:, ['alumni_id', 'gpax']]
            alumni['graduated_year'] = graduated_year
            alumni.loc[alumni['gpax'] == 'ไม่ระบุ', ['gpax']] = -1
            alumni.astype({'gpax': float})

            # alumni graduated table
            db = DatabaseHelper()
            branch = db.get_branch()
            branch = branch['value']
            alumni_graduated = data.loc[:, ['alumni_id', 'branch_name']]

            for i in branch:
                branch_name = i['branch_name']
                if \
                        alumni_graduated.loc[
                            alumni_graduated['branch_name'].str.contains(branch_name.split()[0]), [
                                'branch_name']].shape[
                            0] > 0:
                    alumni_graduated.loc[alumni_graduated['branch_name'].str.
                                         contains(branch_name.split()[0]),
                                         ['branch_name']] = str(i['branch_id'])

            alumni_graduated.rename(columns={'branch_name': 'branch_id'},
                                    inplace=True)

            # alumni apprentice table
            apprentice = db.get_apprentice_status_list()
            apprentice = apprentice['value']

            apprentice_table = data.loc[:, ['alumni_id', 'apprentice']]

            for i in apprentice:
                title = i['status_title']
                title_id = i['status_id']
                if apprentice_table.loc[
                        apprentice_table['apprentice'].str.contains(title),
                    ['apprentice']].shape[0] > 0:
                    apprentice_table.loc[
                        apprentice_table['apprentice'].str.contains(title),
                        ['apprentice']] = str(title_id)

            apprentice_table.rename(columns={'apprentice': 'apprentice_id'},
                                    inplace=True)

            # alumni working table
            working_status = db.get_working_status_list()
            working_status = working_status['value']

            working_table = data.loc[:, [
                'alumni_id', 'status', 'company', 'institution',
                'job_description', 'faculty', 'branch', 'salary'
            ]]

            for i in working_status:
                title = i['status_title']
                title_id = i['status_id']
                if working_table.loc[working_table['status'].str.
                                     contains(title), ['status']].shape[0] > 0:
                    working_table.loc[
                        working_table['status'].str.contains(title),
                        ['status']] = str(title_id)

            working_table.rename(columns={'status': 'status_id'}, inplace=True)

            working_table.loc[working_table['salary'].str.contains("ไม่ระบุ"),
                              ['salary']] = np.nan
            working_table.loc[working_table['salary'] == "",
                              ['salary']] = np.nan
            working_table['salary'] = working_table['salary'].astype(float)

            working_table.loc[working_table['company'] == "",
                              ['company']] = None
            working_table.loc[working_table['institution'] == "",
                              ['institution']] = None
            working_table.loc[working_table['job_description'] == "",
                              ['job_description']] = None
            working_table.loc[working_table['faculty'] == "",
                              ['faculty']] = None
            working_table.loc[working_table['branch'] == "", ['branch']] = None

            out_function_data = {
                'alumni': alumni.to_json(orient='index'),
                'alumni_graduated': alumni_graduated.to_json(orient='index'),
                'working_table': working_table.to_json(orient='index'),
                'apprentice_table': apprentice_table.to_json(orient='index')
            }
            return inner_res_helper.make_inner_response(
                True, "Data for insert to data base", out_function_data)
        except Exception as e:
            print(e)
            return inner_res_helper.make_inner_response(
                False, "Error", "Having problem when prepare data.")
Ejemplo n.º 2
0
    def analyze_alumni_work(self, year=None):
        connect = DatabaseHelper()
        data = connect.get_all_alumni(year)

        if data['value']:
            df = pd.DataFrame(data['value'])
            df['graduated_gpax'] = df['graduated_gpax'].astype(int)
            branch = connect.get_branch()
            branch_data = analyze_helper.set_branch(branch['value'])
            status_working = analyze_helper.set_fullname(
                connect.get_working_status_list())
            status_apprentice = analyze_helper.set_fullname(
                connect.get_apprentice_status_list())
            branch_dic = analyze_helper.set_dict(branch_data.index,
                                                 branch_data.branch_name)
            status_working_dic = analyze_helper.set_dict(
                status_working.index, status_working.status_title)
            status_apprentice_dic = analyze_helper.set_dict(
                status_apprentice.index, status_apprentice.status_title)

            df_brach = df.groupby('branch_id').size()
            df_branch_finish = analyze_helper.check_list(
                branch_data.index.values, df_brach)

            count_by_status = df.groupby('work_id').size()
            count_by_status_finish = analyze_helper.check_list(
                status_working.index.values, count_by_status)

            count_by_training = df.groupby('apprentice_id').size()
            count_by_training_finish = analyze_helper.check_list(
                status_apprentice.index.values, count_by_training)

            df_gpax = df[df['graduated_gpax'] != -1]
            gpax_by_branch = df_gpax.groupby(
                'branch_id')['graduated_gpax'].mean()
            gpax_by_branch_2decimal = gpax_by_branch.round(2)
            gpax_by_branch_finish = analyze_helper.check_list(
                branch_data.index.values, gpax_by_branch_2decimal)

            list_salary = {
                1: 'น้อยกว่า 10,000',
                2: '10,000-19,999',
                3: '20,000-30,000',
                4: 'มากกว่า 30,000'
            }
            salary_branch_trining = []
            list_analze = {}
            df_salary = df[df['salary'].notna()]
            df_salary = df_salary.copy()
            df_salary['salary'] = df_salary['salary'].astype(int)

            # df_salary.loc[:, ['salary']] =df_salary['salary'].astype(int)
            salary_all_branch_trining = self.__salary_branch_training(
                df_salary[['salary', 'apprentice_id']])
            salary_all_branch_trining_check_index = analyze_helper.check_list_column(
                status_apprentice.index.values, salary_all_branch_trining)
            salary_all_branch_trining_check_column = analyze_helper.check_list(
                list_salary.keys(), salary_all_branch_trining_check_index)
            salary_all_branch_trining_index = analyze_helper.set_fullname_column(
                status_apprentice_dic, salary_all_branch_trining_check_column)
            salary_all_branch_trining_finist = analyze_helper.set_fullname_index(
                list_salary, salary_all_branch_trining_index)
            list_analze['dept_name'] = 'ทั้งหมด'
            list_analze['num_student'] = len(df)
            list_analze[
                'salary_all_branch_training'] = salary_all_branch_trining_finist.to_dict(
                    'index')
            salary_branch_trining.append(list_analze)

            list_branch_traning = df_brach.index.tolist()
            for i in list_branch_traning:
                list_analze = {}
                data = df[df['branch_id'] == i]
                if not data.empty:
                    analyze_salart = self.__salary_branch_training(
                        data[['salary', 'apprentice_id']])
                    analyze_salart = analyze_helper.check_list_column(
                        status_apprentice.index.values, analyze_salart)
                    analyze_salart = analyze_helper.check_list(
                        list_salary.keys(), analyze_salart)
                    analyze_salart = analyze_helper.set_fullname_column(
                        status_apprentice_dic, analyze_salart)
                    analyze_salart = analyze_helper.set_fullname_index(
                        list_salary, analyze_salart)
                    list_analze['dept_name'] = branch_dic[i]
                    list_analze['num_student'] = len(data)
                    list_analze[
                        'salary_all_branch_training'] = analyze_salart.to_dict(
                            'index')
                else:
                    analyze_salart = pd.DataFrame(
                        0,
                        index=np.arange(len(list_salary)),
                        columns=status_apprentice.status_title.tolist())
                    analyze_salart['list_salary'] = list_salary.values()
                    analyze_salart.set_index('list_salary', inplace=True)
                    list_analze['dept_name'] = branch_dic[i]
                    list_analze['num_student'] = 0
                    list_analze[
                        'salary_all_branch_training'] = analyze_salart.to_dict(
                            'index')

                salary_branch_trining.append(list_analze)

            list_branch_traning.insert(0, 'all')

            value = {
                'count_student':
                len(df.index),
                'count_by_branch':
                analyze_helper.set_fullname_index(branch_dic,
                                                  df_branch_finish).to_dict(),
                'count_by_status':
                analyze_helper.set_fullname_index(
                    status_working_dic, count_by_status_finish).to_dict(),
                'count_by_training':
                analyze_helper.set_fullname_index(
                    status_apprentice_dic, count_by_training_finish).to_dict(),
                'salary_all_branch_training':
                dict(zip(list_branch_traning, salary_branch_trining)),
                'gpax_by_branch':
                analyze_helper.set_fullname_index(
                    branch_dic, gpax_by_branch_finish).to_dict(),
            }

            response = True
            message = "Analyze Successfully"
        else:
            value = {}
            response = False
            message = "Don't have Data"
        return inner_res_helper.make_inner_response(response=response,
                                                    message=message,
                                                    value=value)