Python get_wordpieces Exemples, language.xsp.data_preprocessing.language_utils.get_wordpieces Python Exemples

Exemple #1

0

Afficher le fichier

def process_columns(columns, tokenizer, table_name,
                    aligned_schema_entities) -> List[TableColumn]:
    """Processes a column in a table to a TableColumn object."""
    column_obj_list = list()
    for column in columns:
        original_column_name = column["field name"]
        column_name_wordpieces = get_wordpieces(
            original_column_name.replace("_", " "), tokenizer)[0]
        col_type = column["type"].lower()
        if ("int" in col_type or "float" in col_type or "double" in col_type
                or "decimal" in col_type):
            col_type = "number"
        if "varchar" in col_type or "longtext" in col_type:
            col_type = "text"
        column_type = col_type
        table_name = table_name

        matches_to_utterance = (original_column_name.lower().replace("_", " ")
                                in aligned_schema_entities)

        is_foreign_key = column_is_foreign_key(column)

        column_obj_list.append(
            TableColumn(
                column_type,
                original_column_name,
                column_name_wordpieces,
                table_name,
                is_foreign_key,
                matches_to_utterance,
            ))
    return column_obj_list

Exemple #2

0

Afficher le fichier

Fichier : schema_utils.py Projet : yyht/language

def process_table(table_name, columns, tokenizer, aligned_schema_entities):
    """Processes a schema table into a DatabaseTable object."""
    table_obj = DatabaseTable()
    table_obj.original_table_name = table_name

    table_obj.matches_to_utterance = table_obj.original_table_name.lower(
    ).replace('_', ' ') in aligned_schema_entities

    # Name wordpieces. Remove underscores then tokenize.
    table_obj.table_name_wordpieces.extend(
        get_wordpieces(table_name.replace('_', ' '), tokenizer)[0])

    table_obj.table_columns.extend(
        process_columns(columns, tokenizer, table_obj.original_table_name,
                        aligned_schema_entities))

    return table_obj

Exemple #3

0

Afficher le fichier

def process_table(table_name, columns, tokenizer,
                  aligned_schema_entities) -> DatabaseTable:
    """Processes a schema table into a DatabaseTable object."""
    original_table_name = table_name

    matches_to_utterance = (original_table_name.lower().replace("_", " ")
                            in aligned_schema_entities)

    # Name wordpieces. Remove underscores then tokenize.
    table_name_wordpieces = get_wordpieces(table_name.replace("_", " "),
                                           tokenizer)[0]

    table_columns = process_columns(columns, tokenizer, original_table_name,
                                    aligned_schema_entities)

    return DatabaseTable(original_table_name, table_name_wordpieces,
                         table_columns, matches_to_utterance)

Exemple #4

0

Afficher le fichier

Fichier : nl_to_sql_example.py Projet : samuelstevens/language

def populate_utterance(example: NLToSQLExample, schema: Schema, tokenizer) -> None:
    """Sets the model input for a NLToSQLExample."""

    schema_entities = get_schema_entities(schema)

    # Set the utterance wordpieces
    try:
        wordpieces, aligned_schema_entities = get_wordpieces(
            example.model_input.original_utterance, tokenizer, schema_entities
        )
        example.model_input.utterance_wordpieces.extend(wordpieces)

        # Set the table information
        example.model_input.tables.extend(
            process_tables(schema, tokenizer, aligned_schema_entities)
        )
    except UnicodeDecodeError as e:
        print(e)

Exemple #5

0

Afficher le fichier

Fichier : schema_utils.py Projet : yyht/language

def process_columns(columns, tokenizer, table_name, aligned_schema_entities):
    """Processes a column in a table to a TableColumn object."""
    column_obj_list = list()
    for column in columns:
        column_obj = TableColumn()
        column_obj.original_column_name = column['field name']
        column_obj.column_name_wordpieces.extend(
            get_wordpieces(column_obj.original_column_name.replace('_', ' '),
                           tokenizer)[0])
        col_type = column['type'].lower()
        if 'int' in col_type or 'float' in col_type or 'double' in col_type or 'decimal' in col_type:
            col_type = 'number'
        if 'varchar' in col_type or 'longtext' in col_type:
            col_type = 'text'
        column_obj.column_type = col_type
        column_obj.table_name = table_name

        column_obj.matches_to_utterance = column_obj.original_column_name.lower(
        ).replace('_', ' ') in aligned_schema_entities

        column_obj.is_foreign_key = column_is_foreign_key(column)
        column_obj_list.append(column_obj)
    return column_obj_list