Ejemplo n.º 1
0
def create_database(database_engine: Engine, database_name: Text):
    """Try to connect to the database. Create it if it does not exist"""
    try:
        database_engine.connect()
    except sa.exc.OperationalError:
        default_db_url = f"sqlite:///{database_name}.db"
        default_engine = sa.create_engine(default_db_url)
        conn = default_engine.connect()
        conn.execute("commit")
        conn.execute(f"CREATE DATABASE {database_name}")
        conn.close()
Ejemplo n.º 2
0
def create_extension(bind: Engine, extension: str) -> None:
    """Setup an extension in postgresql.
    """
    logger.info('[plenario] Create extension %s' % extension)
    connection = bind.connect()
    connection.execute('create extension %s' % extension)
    connection.close()
Ejemplo n.º 3
0
def create_extension(bind: Engine, extension: str) -> None:
    """Setup an extension in postgresql.
    """
    logger.info('[plenario] Create extension %s' % extension)
    connection = bind.connect()
    connection.execute('create extension %s' % extension)
    connection.close()
Ejemplo n.º 4
0
 def __init__(
     self,
     engine: Engine,
     table: Table,
 ) -> None:
     self.connection = engine.connect()
     self.table = table
Ejemplo n.º 5
0
    def save_data_to_database(self, df: pd.DataFrame, engine: Engine):
        if not AbstractPreprocessor._check_write_privilege(engine):
            AbstractPreprocessor.create_dir(self.output_dir, os.getlogin())
            path = Path.joinpath(self.output_dir, os.getlogin(),
                                 datetime.now().isoformat() + '.json')

            with path.open("a") as f:
                df.to_json(f)
            return
        for idx, row in df.iterrows():
            with engine.connect() as conn:
                t = Table('judgment_map', MetaData(), autoload_with=engine)
                if row['judgments']:  # only insert, when we find judgments
                    # Delete and reinsert as no upsert command is available
                    stmt = t.delete().where(delete_stmt_decisions_with_df(df))
                    conn.execute(stmt)
                    for k in row['judgments']:
                        judgment_type_id = Judgment(k).value
                        stmt = t.insert().values([{
                            "decision_id":
                            str(row['decision_id']),
                            "judgment_id":
                            judgment_type_id
                        }])
                        conn.execute(stmt)
                else:
                    self.logger.warning(
                        f"No judgments found for {row['html_url']}")
 def coverage_get_total(self, engine: Engine, spider: str,
                        table: str) -> int:
     """
     Returns total amount of valid entries to be processed by extractor
     """
     sql_string = f"SELECT count(*) FROM {table} WHERE {self.get_database_selection_string(spider, table)}"
     return pd.read_sql(sql_string, engine.connect())["count"][0]
Ejemplo n.º 7
0
def _get_view_names(engine: Engine, schema: str) -> list:
    """Return a list of view names, upper cased and prefixed by schema if needed."""
    with engine.connect() as conn:
        return [
            f"{f'{schema}.' if schema else ''}{view_name.upper()}"
            for view_name in engine.dialect.get_view_names(conn, schema)
        ]
Ejemplo n.º 8
0
def add_lease(
    args,
    context: Context,
    engine: Engine,
) -> int:
    connection = engine.connect()
    values = obtain_lease_info(LeaseArguments.from_anonymous_args(args),
                               context,
                               missing_as_none=True)
    values = {
        k: (v if v is not None else text('DEFAULT'))
        for k, v in values.items()
    }
    ip, mac = values["IPAddress"], values["MAC"]
    logger.debug(
        "Inserting new lease for IP %s and MAC %s",
        ip,
        mac,
    )
    with connection.begin():
        # TODO: Use INSERT ON CONFLICT UPDATE on newer SQLAlchemy (>= 1.1)
        old_values = query_lease_for_update(connection, ip)
        if old_values is None:
            connection.execute(auth_dhcp_lease.insert(values=values))
        else:
            logger.warning("Lease for IP %s and MAC %s already exists", ip,
                           mac)
            perform_lease_update(connection, ip, mac, old_values, values)
    return os.EX_OK
Ejemplo n.º 9
0
def create_database(bind: Engine, database: str) -> None:
    """Setup a database (schema) in postgresql.
    """
    logger.info('[plenario] Create database %s' % database)
    connection = bind.connect()
    connection.execute('commit')
    connection.execute('create database %s' % database)
    connection.close()
Ejemplo n.º 10
0
def drop_database(bind: Engine, database: str) -> None:
    """Drop a database (schema) in postgresql.
    """
    logger.info('[plenario] Drop database %s' % database)
    connection = bind.connect()
    connection.execute('commit')
    connection.execute('drop database %s' % database)
    connection.close()
Ejemplo n.º 11
0
def create_database(bind: Engine, database: str) -> None:
    """Setup a database (schema) in postgresql.
    """
    logger.info('[plenario] Create database %s' % database)
    connection = bind.connect()
    connection.execute('commit')
    connection.execute('create database %s' % database)
    connection.close()
Ejemplo n.º 12
0
def drop_database(bind: Engine, database: str) -> None:
    """Drop a database (schema) in postgresql.
    """
    logger.info('[plenario] Drop database %s' % database)
    connection = bind.connect()
    connection.execute('commit')
    connection.execute('drop database %s' % database)
    connection.close()
 def coverage_get_successful(self, engine: Engine, spider: str,
                             table: str) -> int:
     """Returns the total entries that got processed successfully"""
     query = (
         f"SELECT count({self.col_name}) FROM {table} WHERE "
         f"{self.get_database_selection_string(spider)} AND {self.col_name} <> 'null'"
     )
     return pd.read_sql(query, engine.connect())["count"][0]
    def save_data_to_database(self, df: pd.DataFrame, engine: Engine):
        with engine.connect() as conn:
            t = Table('lower_court', MetaData(), autoload_with=conn)
            # Delete and reinsert as no upsert command is available
            stmt = t.delete().where(delete_stmt_decisions_with_df(df))
            conn.execute(stmt)
            for _, row in df.iterrows():
                if not 'lower_court' in row or row['lower_court'] is None:
                    continue
                lower_court = row["lower_court"]
                res = {}

                if 'court' in lower_court and lower_court['court'] is not None:
                    res['court_id'] = list(
                        self.select(
                            engine, 'court', 'court_id',
                            f"court_string = '{lower_court['court']}'")
                    )[0]['court_id'][0]
                    res['court_id'] = int(
                        res['court_id']
                    ) if res['court_id'] is not None else None
                if 'canton' in lower_court and lower_court[
                        'canton'] is not None:
                    res['canton_id'] = list(
                        self.select(engine, 'canton', 'canton_id',
                                    f"short_code = '{lower_court['canton']}'")
                    )[0]['canton_id'][0]
                    res['canton_id'] = int(
                        res['canton_id']
                    ) if res['canton_id'] is not None else None
                if 'chamber' in lower_court and lower_court[
                        'chamber'] is not None:
                    res['chamber_id'] = list(
                        self.select(
                            engine, 'chamber', 'chamber_id',
                            f"chamber_string = '{lower_court['chamber']}'")
                    )[0]['chamber_id'][0]
                    res['chamber_id'] = int(
                        res['chamber_id']
                    ) if res['chamber_id'] is not None else None

                    stmt = t.insert().values([{
                        "decision_id":
                        str(row['decision_id']),
                        "court_id":
                        res.get('court_id'),
                        "canton_id":
                        res.get('canton_id'),
                        "chamber_id":
                        res.get('chamber_id'),
                        "date":
                        lower_court.get('date'),
                        "file_number":
                        lower_court.get('file_number')
                    }])
                    conn.execute(stmt)
Ejemplo n.º 15
0
def is_engine_available(engine: Engine) -> bool:
    """
    Проверяет, что соединение с sqlalcyemy-egine открывается.
    """
    try:
        conn = engine.connect()
        conn.detach()
    except SQLAlchemyError as exc:
        return False
    return True
Ejemplo n.º 16
0
def print_leases(
    args,
    context: Context,
    engine: Engine,
) -> int:
    """Print all leases in dnsmasq leasefile format"""
    connection = engine.connect()
    with connection.begin():
        leases = get_all_auth_dhcp_leases(connection)
    context.stdout.writelines(generate_leasefile_lines(leases))
    return os.EX_OK
Ejemplo n.º 17
0
def _drop_database(engine: Engine, dbname: str) -> None:
    """
    Drop a test database.

    :param engine: Engine returned by test_engine()
    :param dbname: Database name to drop
    """
    aurweb.schema.metadata.drop_all(bind=engine)
    conn = engine.connect()
    conn.execute(f"DROP DATABASE {dbname}")
    conn.close()
Ejemplo n.º 18
0
def execute_query(engine: Engine, query: str) -> List[Tuple[Any]]:
    """
    Execute DB queries safely.
    """

    try:
        logging.warning(f"Running query on Snowflake: \n{query}")
        connection = engine.connect()
        results = connection.execute(query).fetchall()
    finally:
        connection.close()
        engine.dispose()
    return results
Ejemplo n.º 19
0
def _create_database(engine: Engine, dbname: str) -> None:
    """
    Create a test database.

    :param engine: Engine returned by test_engine()
    :param dbname: Database name to create
    """
    conn = engine.connect()
    try:
        conn.execute(f"CREATE DATABASE {dbname}")
    except ProgrammingError:  # pragma: no cover
        # The database most likely already existed if we hit
        # a ProgrammingError. Just drop the database and try
        # again. If at that point things still fail, any
        # exception will be propogated up to the caller.
        conn.execute(f"DROP DATABASE {dbname}")
        conn.execute(f"CREATE DATABASE {dbname}")
    conn.close()
    initdb.run(AlembicArgs)
Ejemplo n.º 20
0
def fast_append_to_sql(
    df: pd.DataFrame,
    engine: Engine,
    table_type: Union[Type[TemptableOfficialNoLocation],
                      Type[TemptableOfficialHasLocation]],
):
    table = table_type.__table__
    cols = [x.name for x in table.columns if x.name != "id"]
    colnames = [f'"{x}"' for x in cols]
    temp_df = df.reset_index()

    # make sure we have the columns
    have_cols = set(list(temp_df))
    missing_cols = set(cols) - have_cols
    if len(missing_cols) > 0:
        msg = "Missing columns {}".format(", ".join(list(missing_cols)))
        raise ValueError(msg)

    with closing(engine.connect()) as con:

        if engine.dialect.name == "postgresql":
            dest = ("{}.{}".format(table.schema, table.name)
                    if table.schema is not None else table.name)
            with io.StringIO() as csv:
                temp_df.to_csv(csv,
                               sep="\t",
                               columns=cols,
                               index=False,
                               header=False)
                csv.seek(0)
                with closing(con.connection.cursor()) as cur:
                    cur.copy_from(csv, dest, columns=colnames, null="")
                    cur.connection.commit()
        elif engine.dialect.name == "sqlite":
            # pandas is ok for sqlite
            temp_df[cols].to_sql(table.name,
                                 engine,
                                 if_exists="append",
                                 index=False,
                                 chunksize=500_000)
        else:
            raise NotImplementedError(
                "Only implemented for sqlite and postgres")
Ejemplo n.º 21
0
def update_lease(
    args,
    context: Context,
    engine: Engine,
) -> int:
    connection = engine.connect()
    values = obtain_lease_info(LeaseArguments.from_anonymous_args(args),
                               context,
                               missing_as_none=False)
    values.setdefault('UpdatedAt', text('DEFAULT'))
    ip, mac = values["IPAddress"], values["MAC"]
    logger.debug("Updating lease for IP %s and MAC %s", ip, mac)
    with connection.begin():
        # TODO: Use INSERT ON CONFLICT UPDATE on newer SQLAlchemy (>= 1.1)
        old_values = query_lease_for_update(connection, ip)
        if old_values is None:
            connection.execute(auth_dhcp_lease.insert(values=values))
        else:
            perform_lease_update(connection, ip, mac, old_values, values)
    return os.EX_OK
Ejemplo n.º 22
0
 def save_data_to_database(self, df: pd.DataFrame, engine: Engine):
     for idx, row in df.iterrows():
         with engine.connect() as conn:
             t = Table('citation', MetaData(), autoload_with=engine)
             # Delete and reinsert as no upsert command is available
             stmt = t.delete().where(delete_stmt_decisions_with_df(df))
             engine.execute(stmt)
             for k in row['citations'].keys():
                 citation_type_id = CitationType(k).value
                 for citation in row['citations'][k]:
                     stmt = t.insert().values([{
                         "decision_id":
                         str(row['decision_id']),
                         "citation_type_id":
                         citation_type_id,
                         "url":
                         citation.get("url"),
                         "text":
                         citation["text"]
                     }])
                     engine.execute(stmt)
Ejemplo n.º 23
0
def delete_lease(
    args,
    context: Context,
    engine: Engine,
) -> int:
    connection = engine.connect()
    values = obtain_lease_info(LeaseArguments.from_anonymous_args(args),
                               context,
                               missing_as_none=False)
    ip, mac = values["IPAddress"], values["MAC"]
    logger.debug("Deleting lease for IP %s and MAC %s", ip, mac)
    query = auth_dhcp_lease.delete().where(auth_dhcp_lease.c.IPAddress == ip)
    with connection.begin():
        result = connection.execute(query)
    if result.rowcount != 1:
        logger.warning(
            "Unexpected row count %d while deleting lease for IP %s and MAC %s",
            result.rowcount,
            ip,
            mac,
        )
    return os.EX_OK
Ejemplo n.º 24
0
def exec_sql(engine: Engine, path: Path):
    """
    Call this function to execute the SQL statements within a file against
    your database.

    :param engine: the engine connected to the database
    :param path: the path to the containing your SQL statements
    """
    with engine.connect() as connection:
        logger: logging.Logger = logging.getLogger(__name__)
        for sql_stmt in sqlparse.split(path.read_text().strip()):
            # Parse the statement so that we may detect comments.
            sqlp = sqlparse.parse(sql_stmt)
            # If the parsed statement has only one token and it's statement
            # type is 'unknown'...
            if len(sqlp) == 1 and sqlp[0].get_type() == 'UNKNOWN':
                # ...move along.  This is likely a comment and will cause an
                # exception if we try to execute it by itself.
                continue
            # We're all set.  Execute the statement.
            logger.debug(sql_stmt)
            connection.execute(sql_stmt)
Ejemplo n.º 25
0
def price_network_endpoint(
    engine: Engine = Depends(get_database_engine),
    network_code: str = Path(..., description="Network code"),
    network_region: Optional[str] = Query(None, description="Network region code"),
    forecasts: bool = Query(False, description="Include price forecasts"),
) -> OpennemDataSet:
    """Returns network and network region price info for interval which defaults to network
    interval size

    Args:
        engine ([type], optional): Database engine. Defaults to Depends(get_database_engine).

    Raises:
        HTTPException: No results

    Returns:
        OpennemData: data set
    """
    engine = get_database_engine()

    network = None

    try:
        network = network_from_network_code(network_code)
    except Exception:
        raise HTTPException(detail="Network not found", status_code=status.HTTP_404_NOT_FOUND)

    interval_obj = human_to_interval("5m")
    period_obj = human_to_period("1d")

    scada_range = get_balancing_range(network=network, include_forecasts=forecasts)

    if not scada_range:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail="Could not find a date range",
        )

    if not network:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail="Network not found",
        )

    time_series = TimeSeries(
        start=scada_range.start,
        network=network,
        interval=interval_obj,
        period=period_obj,
    )

    if network_region:
        time_series.network.regions = [NetworkNetworkRegion(code=network_region)]

    query = network_region_price_query(time_series=time_series)

    with engine.connect() as c:
        logger.debug(query)
        row = list(c.execute(query))

    if len(row) < 1:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail="No results",
        )

    result_set = [
        DataQueryResult(interval=i[0], result=i[3], group_by=i[2] if len(i) > 1 else None)
        for i in row
    ]

    result = stats_factory(
        result_set,
        network=time_series.network,
        period=time_series.period,
        interval=time_series.interval,
        units=get_unit("price"),
        group_field="price",
        include_group_code=True,
        include_code=True,
    )

    if not result or not result.data:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail="No results",
        )

    return result
Ejemplo n.º 26
0
 def read_column(self, engine: Engine, spider: str, name: str,
                 lang: str) -> pd.DataFrame:
     query = f"SELECT count({name}) FROM {lang} WHERE {self.get_database_selection_string(spider, lang)} AND {name} <> ''"
     return pd.read_sql(query, engine.connect())['count'][0]
Ejemplo n.º 27
0
async def test_connection(engine: Engine):
    try:
        con = engine.connect()
        return True
    except ConnectionError:
        return False
Ejemplo n.º 28
0
def fueltech_demand_mix(
    engine: Engine = Depends(get_database_engine),  # type: ignore
    network_code: str = Query(..., description="Network code"),
) -> OpennemDataSet:
    """Return fueltech proportion of demand for a network

    Args:
        engine ([type], optional): Database engine. Defaults to Depends(get_database_engine).

    Raises:
        HTTPException: No results

    Returns:
        OpennemData: data set

    @TODO optimize this quer
    """
    engine = get_database_engine()

    network = None

    try:
        network = network_from_network_code(network_code)
    except Exception:
        raise HTTPException(detail="Network not found", status_code=status.HTTP_404_NOT_FOUND)

    interval_obj = human_to_interval("5m")
    period_obj = human_to_period("1d")

    scada_range = get_scada_range(network=network)

    if not scada_range:
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="Could not find a date range",
        )

    if not network:
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="Network not found",
        )

    time_series = TimeSeries(
        start=scada_range.start,
        network=network,
        interval=interval_obj,
        period=period_obj,
    )

    query = network_fueltech_demand_query(time_series=time_series)

    with engine.connect() as c:
        logger.debug(query)
        row = list(c.execute(query))

    if len(row) < 1:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail="No results",
        )

    result_set = [
        DataQueryResult(interval=i[0], result=i[2], group_by=i[1] if len(i) > 1 else None)
        for i in row
    ]

    result = stats_factory(
        result_set,
        network=time_series.network,
        period=time_series.period,
        interval=time_series.interval,
        units=get_unit("emissions_factor"),
        group_field="emission_factor",
        include_group_code=True,
        include_code=True,
    )

    if not result or not result.data:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail="No results",
        )

    return result
Ejemplo n.º 29
0
    def save_data_to_database(self, df: pd.DataFrame, engine: Engine):
        with engine.connect() as conn:
            t_person = Table('person', MetaData(), autoload_with=conn)
            t_jud_person = Table('judicial_person',
                                 MetaData(),
                                 autoload_with=conn)

            # Delete and reinsert as no upsert command is available
            stmt = t_jud_person.delete().where(
                delete_stmt_decisions_with_df(df))
            conn.execute(stmt)

            for _, row in df.iterrows():
                if not 'court_composition' in row or row[
                        'court_composition'] is None:
                    continue
                court_composition: CourtComposition = row['court_composition']
                president = court_composition.president
                if president:
                    # create president person
                    president_dict = {
                        "name":
                        president.name.strip(),
                        "is_natural_person":
                        True,
                        "gender":
                        president.gender.value[0] if president.gender else None
                    }
                    stmt = t_person.insert().returning(
                        text("person_id")).values([president_dict])
                    person_id = conn.execute(stmt).fetchone()['person_id']
                    person_dict = {
                        "decision_id": str(row['decision_id']),
                        "person_id": person_id,
                        "judicial_person_type_id": 1,
                        "is_president": True
                    }
                    stmt = t_jud_person.insert().values([person_dict])
                    conn.execute(stmt)

                # create all judges
                for judge in court_composition.judges:
                    if judge == president:  # President is already created above
                        continue
                    judge_dict = {
                        "name": judge.name.strip(),
                        "is_natural_person": True,
                        "gender":
                        judge.gender.value[0] if judge.gender else None
                    }
                    stmt = t_person.insert().returning(
                        text("person_id")).values([judge_dict])
                    person_id = conn.execute(stmt).fetchone()['person_id']
                    person_dict = {
                        "decision_id": str(row['decision_id']),
                        "person_id": person_id,
                        "judicial_person_type_id": 1,
                        "is_president": False
                    }
                    stmt = t_jud_person.insert().values([person_dict])
                    conn.execute(stmt)

                # create all clerks
                for clerk in court_composition.clerks:
                    clerk_dict = {
                        "name": clerk.name.strip(),
                        "is_natural_person": True,
                        "gender":
                        clerk.gender.value[0] if clerk.gender else None
                    }
                    stmt = t_person.insert().returning(
                        text("person_id")).values([clerk_dict])
                    person_id = conn.execute(stmt).fetchone()['person_id']
                    person_dict = {
                        "decision_id": str(row['decision_id']),
                        "person_id": person_id,
                        "judicial_person_type_id": 2,
                        "is_president": False
                    }
                    stmt = t_jud_person.insert().values([person_dict])
                    conn.execute(stmt)

            where = f"WHERE NOT EXISTS (SELECT 1 FROM judicial_person b WHERE a.person_id = b.person_id) " \
                    f"AND NOT EXISTS (SELECT 1 FROM party c WHERE a.person_id = c.person_id)"
            stmt = t_person.delete().where(text(where))
    def save_data_to_database(self, df: pd.DataFrame, engine: Engine):
        """
            'plaintiff': 1,
            'defendant': 2,
            'representation_plaintiff': 3,
            'representation_defendant': 4
        """
        for idx, row in df.iterrows():
            with engine.connect() as conn:
                t_person = Table('person', MetaData(), autoload_with=conn)
                t_party = Table('party', MetaData(), autoload_with=conn)

                # Delete person
                # Delete and reinsert as no upsert command is available
                stmt = t_party.delete().where(delete_stmt_decisions_with_df(df))
                conn.execute(stmt)

                parties = json.loads(row['parties'])

                # create all plaintiffs
                offset = 0
                for plaintiff in parties.get('plaintiffs'):
                    # Insert person into the person table
                    plaintiff_dict = {"name": plaintiff['name'].strip(),
                                      "is_natural_person": plaintiff['legal_type'] == 'natural person',
                                      "gender": plaintiff['gender']}
                    stmt = t_person.insert().returning(column("person_id")).values([plaintiff_dict])
                    person_id = conn.execute(stmt).fetchone()['person_id']
                    # Then insert the person into the party table
                    stmt = t_party.insert().values(
                        [{"decision_id": str(row['decision_id']), "person_id": person_id, "party_type_id": 1 + offset}])
                    conn.execute(stmt)
                    for representant in plaintiff.get('legal_counsel'):
                        # Insert their representation into the person and party tables
                        representant_dict = {"name": representant['name'].strip(),
                                             "is_natural_person": representant['legal_type'] == 'natural person',
                                             "gender": representant['gender']}
                        stmt = t_person.insert().returning(text("person_id")).values([representant_dict])
                        person_id = conn.execute(stmt).fetchone()['person_id']
                        stmt = t_party.insert().values([{"decision_id": str(row['decision_id']), "person_id": person_id,
                                                         "party_type_id": 3 + offset}])
                        conn.execute(stmt)

                # create all defendants
                offset = 1
                for defendant in parties.get('defendants'):
                    # Insert person into the person table
                    defendant_dict = {"name": plaintiff['name'].strip(),
                                      "is_natural_person": plaintiff['legal_type'] == 'natural person',
                                      "gender": plaintiff['gender']}
                    stmt = t_person.insert().returning(text("person_id")).values([defendant_dict])
                    person_id = conn.execute(stmt).fetchone()['person_id']
                    # Then insert the person into the party table
                    stmt = t_party.insert().values(
                        [{"decision_id": str(row['decision_id']), "person_id": person_id, "party_type_id": 1 + offset}])
                    conn.execute(stmt)
                    for representant in defendant.get('legal_counsel'):
                        # Insert their representation into the person and party tables
                        representant_dict = {"name": representant['name'].strip(),
                                             "is_natural_person": representant['legal_type'] == 'natural person',
                                             "gender": representant['gender']}
                        stmt = t_person.insert().returning(text("person_id")).values([representant_dict])
                        person_id = conn.execute(stmt).fetchone()['person_id']
                        stmt = t_party.insert().values([{"decision_id": str(row['decision_id']), "person_id": person_id,
                                                         "party_type_id": 3 + offset}])
                        conn.execute(stmt)

        with engine.connect() as conn:
            stmt = t_person.delete().where(text(
                f"NOT EXISTS (SELECT FROM judicial_person jp WHERE jp.person_id = person.person_id UNION SELECT FROM party WHERE party.person_id = person.person_id)"))
            conn.execute(stmt)
Ejemplo n.º 31
0
def run(engine: Engine):
    conn = engine.connect()
    ctx = MigrationContext.configure(conn)
    op = Operations(ctx)
def save_from_text_to_database(engine: Engine, df: pd.DataFrame):
    """ Saving these fields
            Column('language', String),
            Column('chamber', String),
            Column('date', Date),
            Column('file_name', String),
            Column('file_number', String),
            Column('file_number_additional', String),
            Column('html_url', String),
            Column('html_raw', String),
            Column('pdf_url', String),
            Column('pdf_raw', String),
    """
    def save_to_db(df: pd.DataFrame, table: str):
        # If the returned df is not a DataFrame but a Series, then convert it into a dataframe and Transpose it to correct the variable. (Not needed for most courts, but edge case needs it)
        if not isinstance(df, pd.DataFrame):
            df = df.to_frame()
            df = df.T
        df.to_sql(table, engine, if_exists="append", index=False)

    def add_ids_to_df_for_decision(series: pd.DataFrame) -> pd.DataFrame:
        query = f"SELECT file_id FROM file WHERE file_name = '{series['file_name']}'"
        series['file_id'] = pd.read_sql(query, engine.connect())["file_id"][0]
        series['language_id'] = -1
        query = f"SELECT chamber_id FROM chamber WHERE chamber_string = '{series['chamber']}'"
        chamber_id = pd.read_sql(query, engine.connect())['chamber_id']
        if len(chamber_id) == 0:
            print(
                f"The chamber {series['chamber']} was not found in the database. "
                f"Add it with the respective court and spider")
            raise ValueError
        else:
            series['chamber_id'] = chamber_id[0]

        series['decision_id'] = uuid.uuid5(uuid.UUID(int=0),
                                           series['file_name'])
        # TODO: Add topic recognition, similar to the title of the court decision
        series['topic'] = ''
        return series

    def save_the_file_numbers(series: pd.DataFrame) -> pd.DataFrame:
        """
        Saves the file_number for each of the decision ids
        :param series:
        :return:
        """
        query = f"SELECT decision_id FROM decision WHERE file_id = '{series['file_id']}'"
        series['decision_id'] = pd.read_sql(query,
                                            engine.connect())["decision_id"][0]
        with engine.connect() as conn:
            t = Table('file_number', MetaData(), autoload_with=engine)
            # Delete and reinsert as no upsert command is available
            stmt = t.delete().where(delete_stmt_decisions_with_df(series))
            conn.execute(stmt)
        series['text'] = series['file_number'].strip(
        )  # .map(lambda x: x.strip())
        save_to_db(series[['decision_id', 'text']], 'file_number')
        if ('file_number_additional' in series
                and series['file_number_additional'] is not None
                and len(series['file_number_additional']) > 0):
            series['text'] = series['file_number_additional'].strip(
            )  # .map(lambda x: x.strip())
            save_to_db(series[['decision_id', 'text']], 'file_number')
        return series

    if df.empty:
        return

    # Delete old decision and file entries
    with engine.connect() as conn:
        t_fil = Table('file', MetaData(), autoload_with=engine)
        t_dec = Table('decision', MetaData(), autoload_with=engine)
        file_name_list = ','.join(
            ["'" + str(item) + "'" for item in df['file_name'].tolist()])
        stmt = t_fil.select().where(text(f"file_name in ({file_name_list})"))
        file_ids = [item['file_id'] for item in conn.execute(stmt).all()]
        if len(file_ids) > 0:
            file_ids_list = ','.join(
                ["'" + str(item) + "'" for item in file_ids])
            # decision_ids = [item['decision_id'] for item in conn.execute(t_dec.select().where(text(f"file_id in ({file_ids_list})"))).all()]

            stmt = t_dec.delete().where(text(f"file_id in ({file_ids_list})"))
            conn.execute(stmt)
            stmt = t_fil.delete().where(text(f"file_id in ({file_ids_list})"))
            conn.execute(stmt)

    save_to_db(df[['file_name', 'html_url', 'pdf_url', 'html_raw', 'pdf_raw']],
               'file')

    df = df.apply(add_ids_to_df_for_decision, 1)

    df = df.replace(
        {np.NaN: None}
    )  # Convert pandas NaT values (Non-Type for Datetime) to None using np as np recognizes these types
    df['date'] = df['date'].replace(r'^\s*$', None, regex=True)
    df['date'] = df['date'].astype('datetime64[ns]')
    save_to_db(df[['language_id', 'chamber_id', 'file_id', 'date', 'topic']],
               'decision')
    df.apply(save_the_file_numbers, 1)