Example #1
0
def create_enzyme_df(path_to_file):
    """
    input:path_to_file. file.gz format
    output:enzyme dataframe
    """

    enzyme_fields = [
        method for method in dir(Enzyme.Record()) if not method.startswith('_')
    ]
    data_matrix = []

    with gzip.open(path_to_file, 'rt') as file:
        for record in enzyme.parse(file):
            data_matrix.append(
                [getattr(record, field) for field in enzyme_fields])

    enzyme_df = pd.DataFrame(data_matrix, columns=enzyme_fields)
    return enzyme_df