def fetch_max_num_of_middle_steps(physical_quantity='isotope'):
    """
    获取选定物理量中所有文件 middle_step 的最大值

    Parameters
    ----------
    physical_quantity : str or PhysicalQuantity, default = 'isotope'
        物理量,可以是物理量名的list[str]或str,
        默认为核素密度

    Returns
    -------
    int
    """
    files = fetch_files_by_name(filenames='all')

    if type_checker(physical_quantity, PhysicalQuantity) == 'str':
        physical_quantity = fetch_physical_quantities_by_name(
            physical_quantity).pop()

    max_num = 0
    for file in files:
        nuc_data = fetch_data_by_filename_and_physical_quantity(
            file, physical_quantity, True)
        cur_num = len(nuc_data.columns)
        if max_num < cur_num:
            max_num = cur_num

    return max_num - 4
def fetch_data_by_filename_and_physical_quantities(filename,
                                                   physical_quantities,
                                                   is_all_step=False):
    """
    根据输入的File和physical quantities从Nuc, NucData,PhysicalQuantity table获取数据

    Parameters
    ----------
    filename : File
        File object
    physical_quantities : list[str] or str or list[PhysicalQuantity] or PhysicalQuantity
        物理量,可以是物理量名的list[str]或str,
        也可以是PhysicalQuantity list也可以是list[PhysicalQuantity]或PhysicalQuantity
    is_all_step : bool, default false
        是否读取全部中间结果数据列,默认只读取最终结果列

    Returns
    -------
    dict[str, pd.DataFrame]
        返回一个结果字典,key为物理量名(str),value为对应物理量的数据(DataFrame)
    """
    dict_df_data = {}

    if type_checker(filename, File) == 'str':
        filename = fetch_files_by_name(filename).pop()

    if type_checker(physical_quantities, PhysicalQuantity) == 'str':
        physical_quantities = fetch_physical_quantities_by_name(
            physical_quantities)

    physical_quantity: PhysicalQuantity
    for physical_quantity in physical_quantities:
        nuc_data = fetch_data_by_filename_and_physical_quantity(
            filename, physical_quantity, is_all_step)
        dict_df_data[physical_quantity.name] = nuc_data

    return dict_df_data
예제 #3
0
def save_extracted_data_to_exel(nuc_data_id,
                                filenames=None,
                                physical_quantities=None,
                                is_all_step=False,
                                result_path=Path('.'),
                                merge=True):
    """
    将数据存入到exel文件
    将传入的File list中包含的文件的数据存到exel文件
    如无filenames is None,则包含所有文件

    Parameters
    ----------
    nuc_data_id : list[int]
    filenames : list[File or str] or File or str
    physical_quantities : list[str or PhysicalQuantity] or str or PhysicalQuantity
        物理量,可以是物理量名的list[str]或str,
        也可以是list[PhysicalQuantity]或PhysicalQuantity
    is_all_step : bool, default = False
        是否读取全部中间结果数据列,默认只读取最终结果列
    result_path : Path or str
    merge : bool, default = True
        是否将结果合并输出至一个文件,否则单独输出至每个文件

    Returns
    -------

    """

    if type_checker(filenames, File) == 'str':
        filenames = fetch_files_by_name(filenames)

    if type_checker(physical_quantities, PhysicalQuantity) == 'str':
        physical_quantities = fetch_physical_quantities_by_name(
            physical_quantities)

    file_name = 'final.xlsx'

    if is_all_step:
        file_name = f'all_steps_{file_name}'

    if merge:
        Path(result_path).joinpath(file_name).unlink(missing_ok=True)
    else:
        for filename in filenames:
            if is_all_step:
                Path(result_path).joinpath(
                    f'all_steps_{filename.name}.xlsx').unlink(missing_ok=True)
            else:
                Path(result_path).joinpath(f'{filename.name}.xlsx').unlink(
                    missing_ok=True)
        del filename

    physical_quantity: PhysicalQuantity
    for physical_quantity in physical_quantities:
        df_left = pd.DataFrame(data=None, columns=['nuc_ix', 'name'])

        filename: File
        for filename in filenames:

            files_name = f'{filename.name}.xlsx'
            if is_all_step:
                files_name = f'all_steps_{filename.name}.xlsx'

            df_right = fetch_extracted_data_by_filename_and_physical_quantity(
                nuc_data_id, filename, physical_quantity, is_all_step)

            if not df_right.empty:
                df_left = pd.merge(df_left,
                                   df_right,
                                   how='outer',
                                   on=['nuc_ix', 'name'])

            if not merge:
                save_to_excel({physical_quantity.name: df_left}, files_name,
                              result_path)
                df_left = pd.DataFrame(data=None, columns=['nuc_ix', 'name'])

        if merge:
            save_to_excel({physical_quantity.name: df_left}, file_name,
                          result_path)
def prediction(filenames,
               physical_quantity='isotope',
               is_all_step=False,
               model_type='iforest',
               model=None,
               fraction=0.01):
    """

    Parameters
    ----------
    filenames : list[File or str] or File or str
    physical_quantity : str or PhysicalQuantity, default = 'isotope'
        物理量,可以是物理量名的list[str]或str,
        默认为核素密度
    is_all_step : bool, default = False
        是否读取全部中间结果数据列,默认只读取最终结果列
    model_type : str
    model
    fraction : float

    Returns
    -------
    pd.DataFrame
    """

    if type_checker(filenames, File) == 'str':
        filenames = fetch_files_by_name(filenames)

    if type_checker(physical_quantity, PhysicalQuantity) == 'str':
        physical_quantity = fetch_physical_quantities_by_name(
            physical_quantity).pop()

    nuc_data_left = pd.DataFrame(columns=['nuc_ix', 'name'])

    for filename in filenames:
        nuc_data_right = fetch_data_by_filename_and_physical_quantity(
            filename, physical_quantity, is_all_step)

        if nuc_data_right.empty:
            continue

        nuc_data_right.rename(columns={
            'first_step': f'{filename.name}_first_step',
            'last_step': f'{filename.name}_last_step'
        },
                              inplace=True)
        columns = {
            col: f'{filename.name}_{col}'
            for col in nuc_data_right.columns.tolist() if 'middle_step' in col
        }
        nuc_data_right.rename(columns=columns, inplace=True)

        numeric_columns = [
            col for col in nuc_data_right.columns.tolist()
            if col not in ['nuc_ix', 'name']
        ]
        nuc_data_right[numeric_columns] = nuc_data_right[
            numeric_columns].astype('float64', copy=False)

        nuc_data_left = pd.merge(nuc_data_left,
                                 nuc_data_right,
                                 how='outer',
                                 on=['nuc_ix', 'name'])

    if model_type is not None:
        model = train_model(nuc_data=nuc_data_left,
                            model_type=model_type,
                            fraction=fraction)

    result_prediction = predict_model(model, data=nuc_data_left)

    return result_prediction[result_prediction['Anomaly'] == 1].drop(
        columns='Anomaly')
def save_prediction_to_exel(filenames,
                            result_path,
                            physical_quantities='isotope',
                            is_all_step=False,
                            merge=True,
                            model_type=None,
                            model_name=None,
                            fraction=0.001):
    """

    Parameters
    ----------
    filenames : list[File or str] or File or str
    physical_quantities : list[str or PhysicalQuantity] or str or PhysicalQuantity
        物理量,可以是物理量名的list[str]或str,
        也可以是list[PhysicalQuantity]或PhysicalQuantity
    is_all_step : bool, default = False
        是否读取全部中间结果数据列,默认只读取最终结果列
    result_path : Path or str
    merge : bool, default = True
        是否将结果合并输出至一个文件,否则单独输出至每个文件
    model_type : str
    model_name : str
    fraction
    Returns
    -------

    """

    if type_checker(filenames, File) == 'str':
        filenames = fetch_files_by_name(filenames)

    if type_checker(physical_quantities, PhysicalQuantity) == 'str':
        physical_quantities = fetch_physical_quantities_by_name(
            physical_quantities)

    if model_type is None:
        model = load_model(model_name)
    else:
        model = None

    result_path = Path(result_path).joinpath('anomaly_detection_result')

    prefix = model_type

    file_name = 'final.xlsx'

    if is_all_step:
        file_name = f'all_steps_{file_name}'

    file_name = f'{prefix}_{file_name}'

    if merge:
        Path(result_path).joinpath(file_name).unlink(missing_ok=True)
    else:
        for filename in filenames:
            if is_all_step:
                Path(result_path).joinpath(
                    f'{prefix}_all_steps_{filename.name}.xlsx').unlink(
                        missing_ok=True)
            else:
                Path(result_path).joinpath(
                    f'{prefix}_{filename.name}.xlsx').unlink(missing_ok=True)
        del filename

    for physical_quantity in physical_quantities:

        if merge:
            df_result = prediction(filenames=filenames,
                                   physical_quantity=physical_quantity,
                                   is_all_step=is_all_step,
                                   model_type=model_type,
                                   model=model,
                                   fraction=fraction)

            df_result.dropna(axis=1, how='all', inplace=True)
            save_to_excel({physical_quantity.name: df_result}, file_name,
                          result_path)
        else:
            df_left = pd.DataFrame(data=None, columns=['nuc_ix', 'name'])

            for filename in filenames:

                files_name = f'{prefix}_{filename.name}.xlsx'
                if is_all_step:
                    files_name = f'{prefix}_all_steps_{filename.name}.xlsx'

                df_right = prediction(filenames=filenames,
                                      physical_quantity=physical_quantity,
                                      is_all_step=is_all_step,
                                      model_type=model_type,
                                      model=model,
                                      fraction=fraction)

                if not df_right.empty:
                    df_right.rename(columns={
                        'Anomaly_Score':
                        f'{filename.name}_Anomaly_Score'
                    },
                                    inplace=True)

                    df_right.dropna(axis=1, how='all', inplace=True)
                    df_left = pd.merge(df_left,
                                       df_right,
                                       how='outer',
                                       on=['nuc_ix', 'name'])

                save_to_excel({physical_quantity.name: df_left}, files_name,
                              result_path)
                df_left = pd.DataFrame(data=None, columns=['nuc_ix', 'name'])
예제 #6
0
def save_comparison_result_to_excel(nuc_data_id,
                                    reference_file,
                                    comparison_files,
                                    result_path,
                                    physical_quantities='isotope',
                                    deviation_mode='relative',
                                    threshold=Decimal('1.0E-12'),
                                    is_all_step=False):
    """
    选定一个基准文件,使其与对比文件列表中的文件一一对比,计算并输出对比结果至工作簿(xlsx文件)

    Parameters
    ----------
    nuc_data_id : list[int]
    reference_file : File or str
        基准文件
    comparison_files : list[str or File]or File or str
        对比文件列表
    result_path : Path or str
    physical_quantities : list[str or PhysicalQuantity] or str or PhysicalQuantity, default = 'isotope'
        对比用物理量,可以是物理量名的list[str]或str,
        也可以是PhysicalQuantity list也可以是list[PhysicalQuantity]或PhysicalQuantity
        默认为核素密度
    deviation_mode : str, default = 'relative'
        绝对=absolute
        相对=relative
        偏差模式,分为绝对和相对,默认为相对
    threshold : Decimal, default = Decimal('1.0E-12')
        偏差阈值,默认1.0E-12
    is_all_step : bool, default = False
        是否读取全部中间结果数据列,默认只读取最终结果列

    Returns
    -------
    """

    if type_checker(reference_file, File) == 'str':
        reference_file = fetch_files_by_name(reference_file)

    if type_checker(comparison_files, File) == 'str':
        comparison_files = fetch_files_by_name(comparison_files)

    for comparison_file in comparison_files:
        print((reference_file.name, comparison_file.name))

        dict_df_all = calculate_comparative_result(
            nuc_data_id=nuc_data_id,
            reference_file=reference_file,
            comparison_file=comparison_file,
            physical_quantities=physical_quantities,
            deviation_mode=deviation_mode,
            threshold=threshold,
            is_all_step=is_all_step)

        file_name = f'{deviation_mode}_{threshold}_{reference_file.name}_vs_{comparison_file.name}.xlsx'

        if is_all_step:
            file_name = f'all_step_{file_name}'

        Path(result_path).joinpath('comparative_result').joinpath(
            file_name).unlink(missing_ok=True)
        save_to_excel(dict_df_all, file_name,
                      Path(result_path).joinpath('comparative_result'))
예제 #7
0
def calculate_comparative_result(nuc_data_id,
                                 reference_file,
                                 comparison_file,
                                 physical_quantities='isotope',
                                 deviation_mode='relative',
                                 threshold=Decimal('1.0E-12'),
                                 is_all_step=False):
    """
    选定一个基准文件,一个对比文件,与其进行对比,计算并返回对比结果

    Parameters
    ----------
    nuc_data_id : list[int]
    reference_file : File or str
        基准文件
    comparison_file : File or str
        对比文件
    physical_quantities : list[str or PhysicalQuantity] or str or PhysicalQuantity, default = 'isotope'
        对比用物理量,可以是物理量名的list[str]或str,
        也可以是PhysicalQuantity list也可以是list[PhysicalQuantity]或PhysicalQuantity
        默认为核素密度
    deviation_mode : str, default = 'relative'
        绝对=absolute
        相对=relative
        偏差模式,分为绝对和相对,默认为相对
    threshold : Decimal, default = Decimal('1.0E-12')
        偏差阈值,默认1.0E-12
    is_all_step : bool, default = False
        是否读取全部中间结果数据列,默认只读取最终结果列

    Returns
    -------
    dict[str, pd.DataFrame]
    """

    if type_checker([reference_file, comparison_file], File) == 'str':
        reference_file = fetch_files_by_name(reference_file).pop()
        comparison_file = fetch_files_by_name(comparison_file).pop()

    if type_checker(physical_quantities, PhysicalQuantity) == 'str':
        physical_quantities = fetch_physical_quantities_by_name(
            physical_quantities)

    dict_df_all = {}

    physical_quantity: PhysicalQuantity
    for physical_quantity in physical_quantities:
        reference_data = fetch_extracted_data_by_filename_and_physical_quantity(
            nuc_data_id, reference_file, physical_quantity, is_all_step)

        comparison_data = fetch_extracted_data_by_filename_and_physical_quantity(
            nuc_data_id, comparison_file, physical_quantity, is_all_step)

        if reference_data.empty or comparison_data.empty:
            continue

        reference_data, comparison_data = _complement_columns(
            reference_data, comparison_data, reference_file.name,
            comparison_file.name)

        df_deviation, reserved_index = _calculate_deviation(
            reference_data, comparison_data, deviation_mode,
            Decimal(threshold))

        dict_df_all[physical_quantity.
                    name] = _merge_reference_comparison_and_deviation(
                        reference_data, comparison_data, df_deviation,
                        reserved_index)

    return dict_df_all
def fetch_transposed_data_by_filename_and_physical_quantity(
        filename, physical_quantity, is_all_step=False):
    """
    根据输入的 File 和 physical quantity 从 Nuc, NucData,PhysicalQuantity table获取数据

    Parameters
    ----------
    filename : File
        File object
    physical_quantity : str or PhysicalQuantity
        物理量,可以是物理量的 str,PhysicalQuantity
    is_all_step : bool, default false
        是否读取全部中间结果数据列,默认只读取第一步和最后一步

    Returns
    -------
    pd.DataFrame
    """

    if type_checker(filename, File) == 'str':
        filename = fetch_files_by_name(filename).pop()

    if type_checker(physical_quantity, PhysicalQuantity) == 'str':
        physical_quantity = fetch_physical_quantities_by_name(
            physical_quantity).pop()

    df_left = pd.DataFrame(data=None, columns=['nuc_ix', 'name'])

    file_id = filename.id
    physical_quantity_id = physical_quantity.id

    if not is_all_step:
        # 不读取中间结果,所以不选择NucData.middle_steps,否则反之
        stmt = lambda_stmt(lambda: select(Nuc.nuc_ix, Nuc.name, NucData.
                                          first_step, NucData.last_step))
    else:
        stmt = lambda_stmt(
            lambda: select(Nuc.nuc_ix, Nuc.name, NucData.first_step, NucData.
                           last_step, NucData.middle_steps))

    stmt += lambda s: s.join(Nuc, Nuc.id == NucData.nuc_id)
    stmt += lambda s: s.join(
        PhysicalQuantity, PhysicalQuantity.id == NucData.physical_quantity_id)
    stmt += lambda s: s.where(NucData.file_id == file_id, PhysicalQuantity.id
                              == physical_quantity_id)

    with Session() as session:
        column_names = [column.name for column in list(stmt.selected_columns)]
        df_right = pd.DataFrame(data=session.execute(stmt).all(),
                                columns=column_names)

    if is_all_step:
        exclude_middle_steps = df_right.drop(columns='middle_steps', axis=1)
        del column_names[-1]
        exclude_middle_steps.columns = column_names

        middle_steps = pd.DataFrame([
            middle_steps_line_parsing(middle_steps)
            for middle_steps in df_right['middle_steps']
            if middle_steps is not None
        ])

        df_right = pd.concat([exclude_middle_steps, middle_steps],
                             axis=1,
                             copy=False)

    if not df_right.empty:
        df_left = pd.merge(df_left,
                           df_right,
                           how='outer',
                           on=['nuc_ix', 'name'])

    df_left.sort_values(by=['nuc_ix'], inplace=True)

    nuc_ix = df_left.loc[:, ['nuc_ix', 'name']]

    df_left = df_left.T
    df_left.columns = df_left.loc['name']
    df_left.drop(['nuc_ix', 'name'], inplace=True)
    df_left = df_left.astype('float64', copy=False)
    reindex = df_left.index.tolist()
    reindex.append(reindex.pop(1))
    df_left = df_left.reindex(reindex, copy=False)

    time_interval = pd.Series(
        (filename.time_interval * i for i in range(filename.repeat_times + 1)),
        name='time_interval',
        index=df_left.index)
    time_interval = time_interval / pd.to_timedelta(1, unit='D')

    df_left = pd.concat([df_left, time_interval], axis=1)

    return nuc_ix, df_left
def fetch_extracted_data_by_filename_and_physical_quantity(
        nuc_data_id, filename, physical_quantity, is_all_step=False):
    """
    获取 extracted_data

    Parameters
    ----------
    nuc_data_id : list[int]
    filename :str or File
    physical_quantity : str or PhysicalQuantity
    is_all_step : bool, default = False
        是否读取全部中间结果数据列,默认只读取最终结果列

    Returns
    -------

    """

    if type_checker(filename, File) == 'str':
        filename = fetch_files_by_name(filename).pop()

    if type_checker(physical_quantity, PhysicalQuantity) == 'str':
        physical_quantity = fetch_physical_quantities_by_name(
            physical_quantity).pop()

    df_left = pd.DataFrame(data=None, columns=['nuc_ix', 'name'])

    physical_quantity_id = physical_quantity.id

    filename: File
    file_id = filename.id

    if not is_all_step:
        # 不读取中间结果,所以不选择NucData.middle_steps,否则反之
        stmt = lambda_stmt(
            lambda: select(Nuc.nuc_ix, Nuc.name, NucData.last_step).where(
                NucData.id.in_(nuc_data_id)))
    else:
        stmt = lambda_stmt(
            lambda: select(Nuc.nuc_ix, Nuc.name, NucData.last_step, NucData.
                           middle_steps).where(NucData.id.in_(nuc_data_id)))

    stmt += lambda s: s.join(Nuc, Nuc.id == NucData.nuc_id)
    stmt += lambda s: s.join(
        PhysicalQuantity, PhysicalQuantity.id == NucData.physical_quantity_id)
    stmt += lambda s: s.where(NucData.file_id == file_id, PhysicalQuantity.id
                              == physical_quantity_id)

    with Session() as session:
        if not is_all_step:
            column_names = ['nuc_ix', 'name', f'{filename.name}_last_step']
            df_right = pd.DataFrame(data=session.execute(stmt).all(),
                                    columns=column_names)
        else:
            column_names = [
                'nuc_ix', 'name', f'{filename.name}_last_step', 'middle_steps'
            ]
            df_right = pd.DataFrame(data=session.execute(stmt).all(),
                                    columns=column_names)

            exclude_middle_steps = df_right.drop(columns='middle_steps',
                                                 axis=1)
            del column_names[-1]
            exclude_middle_steps.columns = column_names

            middle_steps = pd.DataFrame([
                middle_steps_line_parsing(middle_steps)
                for middle_steps in df_right['middle_steps']
                if middle_steps is not None
            ])
            middle_step_column_names = [
                f'{filename.name}_{name}'
                for name in middle_steps.columns.tolist()
            ]
            middle_steps.columns = middle_step_column_names

            df_right = pd.concat([exclude_middle_steps, middle_steps],
                                 axis=1,
                                 copy=False)

    if not df_right.empty:
        df_left = pd.merge(df_left,
                           df_right,
                           how='outer',
                           on=['nuc_ix', 'name'])

    df_left.sort_values(by=['nuc_ix'], inplace=True)

    return df_left
def fetch_extracted_data_id(filenames=None,
                            physical_quantities='all',
                            nuclide_list=None):
    """
    获取extracted_data的id

    Parameters
    ----------
    filenames : list[File] or File
        File object
    physical_quantities : list[str or PhysicalQuantity] or str or PhysicalQuantity
        物理量,可以是物理量名的list[str]或str,
        也可以是list[PhysicalQuantity]或PhysicalQuantity
    nuclide_list : list[str]
        核素list

    Returns
    -------
    list[int]
    """

    if type_checker(filenames, File) == 'str':
        filenames = fetch_files_by_name(filenames)

    if not isinstance(filenames, list):
        filenames = [filenames]

    if type_checker(physical_quantities, PhysicalQuantity) == 'str':
        physical_quantities = fetch_physical_quantities_by_name(
            physical_quantities)

    nuc_data_id = []

    with Session() as session:
        for filename in filenames:
            physical_quantities_id = [
                physical_quantity.id
                for physical_quantity in physical_quantities
            ]
            file_id = filename.id

            if nuclide_list is None:
                # 核素列表为空则过滤first_step和last_step皆为0的records
                stmt = (select(NucData.id).where(
                    NucData.file_id == file_id,
                    NucData.physical_quantity_id.in_(
                        physical_quantities_id)).where(
                            or_(NucData.first_step != 0,
                                NucData.last_step != 0)))
            elif nuclide_list == 'all':
                stmt = (select(NucData.id).where(
                    NucData.file_id == file_id,
                    NucData.physical_quantity_id.in_(physical_quantities_id)))
            else:
                # 核素不为gamma时,依照核素列表过滤records,否则反之
                for physical_quantity in physical_quantities:
                    if physical_quantity.name == 'gamma_spectra':
                        gamma_physical_quantity_id = physical_quantity.id

                        gamma_stmt = (select(NucData.id).where(
                            NucData.file_id == file_id,
                            NucData.physical_quantity_id ==
                            gamma_physical_quantity_id))
                        nuc_data_id.extend(
                            session.execute(gamma_stmt).scalars().all())

                stmt = (select(NucData.id).join(
                    Nuc, Nuc.id == NucData.nuc_id).where(
                        NucData.file_id == file_id,
                        NucData.physical_quantity_id.in_(
                            physical_quantities_id)).where(
                                Nuc.name.in_(nuclide_list)))

            nuc_data_id.extend(session.execute(stmt).scalars().all())

    return nuc_data_id
def fetch_data_by_filename_and_nuclide_list(filename,
                                            physical_quantities,
                                            nuclide_list,
                                            is_all_step=False):
    """
    根据输入的File,physical quantities,nuclide_list(核素列表),all_step
    从Nuc, NucData,PhysicalQuantity table获取数据

    Parameters
    ----------
    filename : File
        File object
    physical_quantities : list[str] or str or list[PhysicalQuantity] or PhysicalQuantity
        物理量,可以是物理量名的list[str]或str,
        也可以是PhysicalQuantity list也可以是list[PhysicalQuantity]或PhysicalQuantity
    nuclide_list : list[str]
        核素list
    is_all_step : bool, default false
        是否读取全部中间结果数据列,默认只读取最终结果列

    Returns
    -------
    dict[str, pd.DataFrame]
        返回一个结果字典,key为物理量名(str),value为对应物理量的数据(DataFrame)
    """
    dict_df_data = {}

    if type_checker(physical_quantities, PhysicalQuantity) == 'str':
        physical_quantities = fetch_physical_quantities_by_name(
            physical_quantities)

    with Session() as session:
        physical_quantity: PhysicalQuantity
        for physical_quantity in physical_quantities:
            file_id = filename.id
            physical_quantity_id = physical_quantity.id

            if not is_all_step:
                # 不读取中间结果,所以不选择NucData.middle_steps,否则反之
                stmt = lambda_stmt(
                    lambda: select(Nuc.nuc_ix, Nuc.name, NucData.first_step,
                                   NucData.last_step))
            else:
                stmt = lambda_stmt(
                    lambda: select(Nuc.nuc_ix, Nuc.name, NucData.first_step,
                                   NucData.last_step, NucData.middle_steps))

            stmt += lambda s: s.join(Nuc, Nuc.id == NucData.nuc_id)
            stmt += lambda s: s.join(
                PhysicalQuantity, PhysicalQuantity.id == NucData.
                physical_quantity_id)
            stmt += lambda s: s.where(
                NucData.file_id == file_id, PhysicalQuantity.id ==
                physical_quantity_id)
            if nuclide_list is None:
                # 核素列表为空则过滤first_step和last_step皆为0的records
                stmt += lambda s: s.where(
                    or_(NucData.first_step != 0, NucData.last_step != 0))
            else:
                if physical_quantity.name != 'gamma_spectra':
                    # 核素不为gamma时,依照核素列表过滤records,否则反之
                    stmt += lambda s: s.where(Nuc.name.in_(nuclide_list))

            nuc_data = pd.DataFrame(
                data=session.execute(stmt).all(),
                columns=tuple(column.name
                              for column in list(stmt.selected_columns)))

            if is_all_step:
                nuc_data_exclude_middle_steps = nuc_data.drop(
                    columns='middle_steps', axis=1)
                middle_steps = pd.DataFrame([
                    middle_steps_line_parsing(middle_steps)
                    for middle_steps in nuc_data['middle_steps']
                    if middle_steps is not None
                ])

                del nuc_data
                nuc_data = pd.concat(
                    [nuc_data_exclude_middle_steps, middle_steps],
                    axis=1,
                    copy=False)

            nuc_data.sort_values(by=['nuc_ix'], inplace=True)
            dict_df_data[physical_quantity.name] = nuc_data

    return dict_df_data
def fetch_data_by_filename_and_physical_quantity(filename,
                                                 physical_quantity,
                                                 is_all_step=False):
    """
    根据输入的 File 和 physical quantity 从 Nuc, NucData,PhysicalQuantity table获取数据

    Parameters
    ----------
    filename : File
        File object
    physical_quantity : str or PhysicalQuantity
        物理量,可以是物理量的 str,PhysicalQuantity
    is_all_step : bool, default false
        是否读取全部中间结果数据列,默认只读取最终结果列

    Returns
    -------
    pd.DataFrame
    """

    if type_checker(filename, File) == 'str':
        filename = fetch_files_by_name(filename).pop()

    if type_checker(physical_quantity, PhysicalQuantity) == 'str':
        physical_quantity = fetch_physical_quantities_by_name(
            physical_quantity).pop()

    df_left = pd.DataFrame(data=None, columns=['nuc_ix', 'name'])

    file_id = filename.id
    physical_quantity_id = physical_quantity.id

    if not is_all_step:
        # 不读取中间结果,所以不选择NucData.middle_steps,否则反之
        stmt = lambda_stmt(lambda: select(Nuc.nuc_ix, Nuc.name, NucData.
                                          first_step, NucData.last_step))
    else:
        stmt = lambda_stmt(
            lambda: select(Nuc.nuc_ix, Nuc.name, NucData.first_step, NucData.
                           last_step, NucData.middle_steps))

    stmt += lambda s: s.join(Nuc, Nuc.id == NucData.nuc_id)
    stmt += lambda s: s.join(
        PhysicalQuantity, PhysicalQuantity.id == NucData.physical_quantity_id)
    stmt += lambda s: s.where(NucData.file_id == file_id, PhysicalQuantity.id
                              == physical_quantity_id)

    with Session() as session:
        column_names = [column.name for column in list(stmt.selected_columns)]
        df_right = pd.DataFrame(data=session.execute(stmt).all(),
                                columns=column_names)
        if is_all_step:
            exclude_middle_steps = df_right.drop(columns='middle_steps',
                                                 axis=1)
            del column_names[-1]
            exclude_middle_steps.columns = column_names

            middle_steps = pd.DataFrame([
                middle_steps_line_parsing(middle_steps)
                for middle_steps in df_right['middle_steps']
                if middle_steps is not None
            ])

            df_right = pd.concat([exclude_middle_steps, middle_steps],
                                 axis=1,
                                 copy=False)

    if not df_right.empty:
        df_left = pd.merge(df_left,
                           df_right,
                           how='outer',
                           on=['nuc_ix', 'name'])

    df_left.sort_values(by=['nuc_ix'], inplace=True)

    return df_left