Python Session.bulk_update_mappings 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: sqlalchemy.orm.session

클래스/타입: Session

메소드/함수: bulk_update_mappings

hotexamples.com에서의 예제들: 2

Python Session.bulk_update_mappings - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 sqlalchemy.orm.session.Session.bulk_update_mappings에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

delete(30)

merge(30)

rollback(30)

commit(30)

close(30)

flush(30)

get_bind(30)

object_session(30)

add(30)

query(30)

refresh(30)

execute(30)

expire_all(17)

expunge_all(9)

__init__(9)

begin(8)

bulk_save_objects(8)

expunge(8)

begin_nested(6)

configure(5)

bind(5)

connection(4)

bulk_insert_mappings(4)

add_all(3)

expire(2)

bulk_update_mappings(2)

_model_changes(2)

_changes(2)

scalar(1)

save(1)

Commit(1)

remove(1)

close_all(1)

update(1)

예제 #1

파일 보기

def fill_missing_values(session: Session, only_kegg: bool, batch_size: int,
                        error_log: str) -> None:
    """
    Complete missing mass and/or atom bag information from InChI strings.

    Parameters
    ----------
    session : sqlalchemy.orm.session.Session
        An active session in order to communicate with a SQL database.
    only_kegg : bool
        Calculate thermodynamic information for compounds contained in KEGG
        only.
    batch_size : int
        The size of batches of compounds considered at a time.
    error_log : str
        The base file path for error output.

    """
    query = session.query(Compound.id, Compound.mnx_id, Compound.inchi)

    if only_kegg:
        # Filter compounds in KEGG or COCO (additional compounds for
        # component-contribution)
        query = query.join(CompoundIdentifier).join(Registry)
        query = query.filter(Registry.namespace.in_(("kegg", "coco")))
        query = query.group_by(Compound.id)

    query = query.filter(
        Compound.inchi.isnot(None),
        or_(Compound.mass.is_(None), Compound.atom_bag.is_(None)),
    )

    logger.debug("calculating mass for compounds with missing values")
    input_df = pd.read_sql_query(query.statement, query.session.bind)

    with tqdm(total=len(input_df), desc="Analyzed") as pbar:
        for index in range(0, len(input_df), batch_size):
            view = input_df.iloc[index:index + batch_size, :]
            try:
                view = get_molecular_masses(view, f"{error_log}_batch_{index}")
                compounds = []
                for row in view.itertuples(index=False):
                    try:
                        atom_bag = get_atom_bag("inchi", row.inchi)
                    except OSError as e:
                        logger.warning(str(e))
                        atom_bag = {}
                    compounds.append({
                        "id": row.id,
                        "mass": row.mass,
                        "atom_bag": atom_bag
                    })
                session.bulk_update_mappings(Compound, compounds)
                session.commit()
            except ValueError as e:
                logger.warning(str(e))

            pbar.update(len(view))

예제 #2

파일 보기

def update_headlines(session: Session, user_dict: Path,
                     logger: Logger) -> None:

    query_result = session \
        .query(Headline) \
        .filter(Headline.is_used.is_(None)) \
        .all()
    headlines = list(query_result)

    if len(headlines) == 0:
        return

    tokenizer = Tokenizer(str(user_dict))
    mappings = []

    logger.info('start updating headlines')
    for headline in tqdm(headlines):

        h = simplify_headline(headline.headline)

        is_about_di = headline.categories is not None and \
            DOMESTIC_INDEX in headline.categories

        # We stopped using `is_template` because the size of the dataset decreased and the result got worse.
        # if is_template(h) or not is_interesting(h) or not is_about_di:
        if not is_interesting(h) or not is_about_di:
            mappings.append({
                'article_id': headline.article_id,
                'is_used': False
            })
            continue

        tokens = kansuuzi2number(
            [token.surface for token in tokenizer.tokenize(h)])
        tag_tokens = replace_prices_with_tags(tokens)

        mappings.append({
            'article_id': headline.article_id,
            'simple_headline': h,
            'tokens': tokens,
            'tag_tokens': tag_tokens,
            'is_used': True,
        })
    session.bulk_update_mappings(Headline, mappings)
    session.commit()
    logger.info('end updating headlines')