def extract_insolvencies(creditor_ids):
    log.info("Extracting insolvencies...")

    insolvencies_df = sql2df("""
        SELECT it3.id AS insolvency_id, it3.debtor_name AS name, it3.ico, 
                creditor_name2creditor_id(it3.debtor_name) AS string_id, it3.birth_number_hash_code,
                it3.person_type AS person_type, 
                it3.reference_number AS reference_number, it3.region_id AS region_id, 
                it3.proposal_timestamp::DATE AS date
            FROM insolvency_tab it3  
            JOIN (SELECT it.id 
                FROM insolvency_tab it 
                    JOIN v_creditors_receivables ft2 ON it.id=ft2.insolvency_id 
                WHERE ft2.creditor_string_id is not null 
                    AND ft2.creditor_string_id = ANY(%(creditor_ids)s) 
                GROUP BY it.id) as insolvencies 
            ON it3.id=insolvencies.id
        """,
                             creditor_ids=creditor_ids).drop_duplicates()

    insolvencies_df = insolvencies_df[
        insolvencies_df["date"] < MAX_DATE].copy()
    insolvencies_df = normalize_df_by_ins_id(insolvencies_df)
    insolvencies_df = insolvencies_df.groupby(
        "insolvency_id").first().reset_index()

    insolvencies_df["debtor_id"] = insolvencies_df.ico\
        .apply(lambda i: None if pd.isnull(i) else int(i))\
        .fillna(insolvencies_df.birth_number_hash_code\
                           .apply(lambda n: None if pd.isnull(n) else int(n)))\
        .fillna(insolvencies_df.string_id)
    assert insolvencies_df.debtor_id.isna().sum() == 0
    log.info("Extracted {} insolvency records".format(len(insolvencies_df)))

    return insolvencies_df
def extract_administrator_creditor_edges(insolvency_end_dates_df,
                                         creditor_ids):
    log.info("Converting to administrator -> creditor edges...")
    administrator_creditor_edges_df = sql2df(
        """
        SELECT ft.insolvency_id, administrator_id, creditor_string_id, start_date::DATE, end_date::DATE
        FROM insolvencies_administrators_tab iat
             JOIN file_tab ft ON ft.insolvency_id = iat.insolvency_id
        WHERE ft.creditor_string_id IS NOT NULL 
              AND ft.creditor_string_id = ANY(%(creditor_ids)s)
    """,
        creditor_ids=creditor_ids).drop_duplicates()

    administrator_creditor_edges_df[
        "administrator_id"] = administrator_creditor_edges_df[
            "administrator_id"].apply(lambda id_: "adm_%d" % id_)
    administrator_creditor_edges_df = normalize_df_by_ins_id(
        administrator_creditor_edges_df)
    administrator_creditor_edges_df = administrator_creditor_edges_df.drop_duplicates(
    )

    administrator_creditor_edges_df = administrator_creditor_edges_df\
        .merge(insolvency_end_dates_df, on="insolvency_id", how="left")
    administrator_creditor_edges_df["end_date"] = administrator_creditor_edges_df\
        .end_date_x.fillna(administrator_creditor_edges_df.end_date_y)

    administrator_creditor_edges_df = administrator_creditor_edges_df\
        .rename(columns={"administrator_id": "source_id", "creditor_string_id": "target_id"})\
        [["source_id", "target_id", "start_date", "end_date"]]
    administrator_creditor_edges_df["edge_type"] = "administrator_creditor"
    log.info("Extracted {} edge records".format(
        len(administrator_creditor_edges_df)))

    return administrator_creditor_edges_df
def extract_creditors():
    log.info("Extracting creditors...")
    creditor_df = sql2df(
        "SELECT creditor_string_id AS id, max(creditor) as name, count(*) as count  \
        FROM v_creditors_receivables ft \
        GROUP BY creditor_string_id \
        ORDER BY count DESC \
        LIMIT 100").drop_duplicates().drop(labels=["count"], axis=1)
    creditor_df["node_type"] = "creditor"
    log.info("Extracted {} creditor records".format(len(creditor_df)))
    return creditor_df[["id", "name", "node_type"]]
def extract_insolvency_end_dates():
    insolvency_end_dates_df = sql2df("""
        SELECT insolvency_id, MAX(state_change_timestamp)::DATE AS end_date 
        FROM current_insolvency_states_tab cist 
            JOIN insolvency_states_types_tab istt ON cist.state = istt.id
        WHERE istt.text_identifier = 'ODSKRTNUTA' GROUP BY insolvency_id""")
    insolvency_end_dates_df = normalize_df_by_ins_id(insolvency_end_dates_df)
    insolvency_end_dates_df = insolvency_end_dates_df.groupby(
        "insolvency_id").max()

    return insolvency_end_dates_df
def extract_insolvency_creditor_edges():
    insolvency_creditor_edges_df = sql2df("""
        SELECT insolvency_id, creditor_string_id AS creditor_id, 
            proposal_timestamp::DATE AS start_date 
        FROM v_creditors_receivables 
            JOIN insolvency_tab it ON insolvency_id=it.id 
        WHERE creditor_string_id IS NOT NULL""")
    insolvency_creditor_edges_df = normalize_df_by_ins_id(
        insolvency_creditor_edges_df)
    insolvency_creditor_edges_df = insolvency_creditor_edges_df.drop_duplicates(
    )

    return insolvency_creditor_edges_df
def extract_insolvency_administrator_edges():
    log.info("Extracting insolvency -> administrator edges...")
    insolvency_administrator_edges_df = sql2df(
        "SELECT it.id AS insolvency_id, iat.administrator_id, start_date::DATE, end_date::DATE \
         FROM insolvencies_administrators_tab iat JOIN insolvency_tab it ON iat.insolvency_id=it.id"
    ).drop_duplicates()

    insolvency_administrator_edges_df[
        "administrator_id"] = insolvency_administrator_edges_df[
            "administrator_id"].apply(lambda id_: "adm_%d" % id_)
    insolvency_administrator_edges_df = normalize_df_by_ins_id(
        insolvency_administrator_edges_df)
    insolvency_administrator_edges_df = insolvency_administrator_edges_df.drop_duplicates(
    )
    log.info("Extracted {} edge records".format(
        len(insolvency_administrator_edges_df)))
    return insolvency_administrator_edges_df
def extract_administrators(creditor_ids):
    log.info("Extracting administrators...")
    administrators_df = sql2df("""
        SELECT at.id AS id, at.name AS name 
        FROM administrators_tab at 
        JOIN insolvencies_administrators_tab iat ON iat.administrator_id=at.id 
        JOIN (SELECT it.id 
            FROM insolvency_tab it 
                JOIN v_creditors_receivables ft2 ON it.id=ft2.insolvency_id 
            WHERE ft2.creditor_string_id is not null 
                AND ft2.creditor_string_id = ANY(%(creditor_ids)s) 
            GROUP BY it.id) as its 
        ON iat.insolvency_id=its.id 
        GROUP BY at.id, at.name""",
                               creditor_ids=creditor_ids).drop_duplicates()
    administrators_df["id"] = administrators_df["id"].apply(
        lambda id_: "adm_%d" % id_)
    administrators_df["node_type"] = "administrator"
    log.info("Extracted {} administrator records".format(
        len(administrators_df)))

    return administrators_df[["id", "name", "node_type"]]