Esempio n. 1
0
File: import.py Progetto: cceh/ntg
def import_att_fdw (dbsrc, dbdest, parameters):
    """Import att and lac tables from mysql.

    Import the (28 * 2) mysql tables to 2 tables in the postgres database.

    """

    log (logging.INFO, "  Importing mysql att tables ...")

    dbsrc_meta = sqlalchemy.schema.MetaData (bind = dbsrc.engine)
    dbsrc_meta.reflect ()

    with dbdest.engine.begin () as dest:
        concat_tables_fdw (dest, dbsrc_meta, 'original_att', 'app_fdw', config['MYSQL_ATT_TABLES'])

    with dbdest.engine.begin () as dest:
        if config.get ('MYSQL_LAC_TABLES'):
            log (logging.INFO, "  Importing mysql lac tables ...")
            concat_tables_fdw (dest, dbsrc_meta, 'original_lac', 'app_fdw', config['MYSQL_LAC_TABLES'])
        else:
            # no lacuna tables provided (eg. John)
            execute (dest, """
	        DROP TABLE IF EXISTS original_lac;
	        CREATE TABLE original_lac (LIKE original_att);
            """, parameters)

    with dbdest.engine.begin () as dest:
        execute (dest, """
        ALTER TABLE original_att RENAME COLUMN anfadr TO begadr;
        ALTER TABLE original_lac RENAME COLUMN anfadr TO begadr;
        """, parameters)
Esempio n. 2
0
File: import.py Progetto: cceh/ntg
def import_nestle_fdw (dbsrc, dbdest, parameters):
    """Import Nestle table from mysql."""

    if config.get ('MYSQL_NESTLE_TABLE'):
        with dbdest.engine.begin () as dest:
            log (logging.INFO, "  Importing mysql nestle table ...")
            copy_table_fdw (dest, 'original_nestle', 'nestle_fdw', config['MYSQL_NESTLE_TABLE'])
Esempio n. 3
0
def import_att_fdw(dbsrc, dbdest, parameters):
    """Import att and lac tables from mysql.

    Import the (28 * 2) mysql tables to 2 tables in the postgres database.

    """

    log(logging.INFO, "  Importing mysql att tables ...")

    dbsrc_meta = sqlalchemy.schema.MetaData(bind=dbsrc.engine)
    dbsrc_meta.reflect()

    with dbdest.engine.begin() as dest:
        concat_tables_fdw(dest, dbsrc_meta, 'original_att', 'app_fdw',
                          config['MYSQL_ATT_TABLES'])

    with dbdest.engine.begin() as dest:
        if config.get('MYSQL_LAC_TABLES'):
            log(logging.INFO, "  Importing mysql lac tables ...")
            concat_tables_fdw(dest, dbsrc_meta, 'original_lac', 'app_fdw',
                              config['MYSQL_LAC_TABLES'])
        else:
            # no lacuna tables provided (eg. John)
            execute(
                dest, """
	        DROP TABLE IF EXISTS original_lac;
	        CREATE TABLE original_lac (LIKE original_att);
            """, parameters)

    with dbdest.engine.begin() as dest:
        execute(
            dest, """
        ALTER TABLE original_att RENAME COLUMN anfadr TO begadr;
        ALTER TABLE original_lac RENAME COLUMN anfadr TO begadr;
        """, parameters)
Esempio n. 4
0
def import_nestle_fdw(dbsrc, dbdest, parameters):
    """Import Nestle table from mysql."""

    if config.get('MYSQL_NESTLE_TABLE'):
        with dbdest.engine.begin() as dest:
            log(logging.INFO, "  Importing mysql nestle table ...")
            copy_table_fdw(dest, 'original_nestle', 'nestle_fdw',
                           config['MYSQL_NESTLE_TABLE'])
Esempio n. 5
0
File: import.py Progetto: cceh/ntg
def import_genealogical_fdw (dbsrc, dbdest, parameters):
    """Import genealogical tables from mysql.

    Import the (28 * 3) mysql tables to 3 tables in the postgres database.

    """

    if not config.get ('MYSQL_VG_DB'):
        return

    dbsrc_meta = sqlalchemy.schema.MetaData (bind = dbsrc.engine)
    dbsrc_meta.reflect ()

    with dbdest.engine.begin () as dest:
        if config.get ('MYSQL_LOCSTEM_TABLES'):
            log (logging.INFO, "  Importing mysql locstem tables ...")
            concat_tables_fdw (dest, dbsrc_meta, 'original_locstemed', 'var_fdw', config['MYSQL_LOCSTEM_TABLES'])

    with dbdest.engine.begin () as dest:
        if config.get ('MYSQL_RDG_TABLES'):
            log (logging.INFO, "  Importing mysql rdg tables ...")
            concat_tables_fdw (dest, dbsrc_meta, 'original_rdg',       'var_fdw', config['MYSQL_RDG_TABLES'])

    with dbdest.engine.begin () as dest:
        if config.get ('MYSQL_VAR_TABLES'):
            log (logging.INFO, "  Importing mysql var tables ...")
            concat_tables_fdw (dest, dbsrc_meta, 'original_var',       'var_fdw', config['MYSQL_VAR_TABLES'])

    with dbdest.engine.begin () as dest:
        if config.get ('MYSQL_MEMO_TABLE'):
            log (logging.INFO, "  Importing mysql memo table ...")
            copy_table_fdw    (dest,             'original_memo',      'var_fdw', config['MYSQL_MEMO_TABLE'])
            execute (dest, """
            ALTER TABLE original_memo RENAME COLUMN anfadr TO begadr;
            """, parameters)
Esempio n. 6
0
        def postco (mask_matrix, anc_matrix):

            local_stemmas_with_loops = set ()

            # Matrix range x ms x ms with count of the passages that are older in ms1 than in ms2
            ancestor_matrix = np.zeros ((val.n_ranges, val.n_mss, val.n_mss), dtype = np.uint16)

            # Matrix range x ms x ms with count of the passages whose relationship is unclear in ms1 and ms2
            unclear_matrix  = np.zeros ((val.n_ranges, val.n_mss, val.n_mss), dtype = np.uint16)

            for j in range (0, val.n_mss):
                for k in range (0, val.n_mss):
                    # See: VGA/VGActs_allGenTab3Ph3.pl

                    # set bit if the reading of j is ancestral to the reading of k
                    varidj_is_older = np.bitwise_and (mask_matrix[j], anc_matrix[k]) > 0
                    varidk_is_older = np.bitwise_and (mask_matrix[k], anc_matrix[j]) > 0

                    if j == 0 and k > 0 and varidk_is_older.any ():
                        log (logging.ERROR, "Found varid older than A in msid: %d = %s"
                             % (k, np.nonzero (varidk_is_older)))

                    # error check for loops
                    if do_checks:
                        check = np.logical_and (varidj_is_older, varidk_is_older)
                        if np.any (check):
                            not_check       = np.logical_not (check)
                            varidj_is_older = np.logical_and (varidj_is_older, not_check)
                            varidk_is_older = np.logical_and (varidk_is_older, not_check)

                            local_stemmas_with_loops |= set (np.nonzero (check)[0])

                    # wenn die vergl. Hss. von einander abweichen u. eine von ihnen
                    # Q1 = '?' hat, UND KEINE VON IHNEN QUELLE DER ANDEREN IST, ist
                    # die Beziehung 'UNCLEAR'

                    unclear = np.logical_and (val.def_matrix[j], val.def_matrix[k])
                    unclear = np.logical_and (unclear, np.not_equal (val.labez_matrix[j], val.labez_matrix[k]))
                    unclear = np.logical_and (unclear, np.logical_or (quest_matrix[j], quest_matrix[k]))
                    unclear = np.logical_and (unclear, np.logical_not (np.logical_or (varidj_is_older, varidk_is_older)))

                    ancestor_matrix[:,j,k] = count_by_range (varidj_is_older, val.range_starts, val.range_ends)
                    unclear_matrix[:,j,k]  = count_by_range (unclear, val.range_starts, val.range_ends)

            if local_stemmas_with_loops:
                log (logging.ERROR, "Found loops in local stemmata: %s" % sorted (local_stemmas_with_loops))

            return ancestor_matrix, unclear_matrix
Esempio n. 7
0
def import_genealogical_fdw(dbsrc, dbdest, parameters):
    """Import genealogical tables from mysql.

    Import the (28 * 3) mysql tables to 3 tables in the postgres database.

    This function is relevant only for Acts, where we had to import genealogical
    data from a previous implementation of the CBGM.  It is not used for new
    projects.

    """

    if not config.get('MYSQL_VG_DB'):
        return

    dbsrc_meta = sqlalchemy.schema.MetaData(bind=dbsrc.engine)
    dbsrc_meta.reflect()

    with dbdest.engine.begin() as dest:
        if config.get('MYSQL_LOCSTEM_TABLES'):
            log(logging.INFO, "  Importing mysql locstem tables ...")
            concat_tables_fdw(dest, dbsrc_meta, 'original_locstemed',
                              'var_fdw', config['MYSQL_LOCSTEM_TABLES'])

    with dbdest.engine.begin() as dest:
        if config.get('MYSQL_RDG_TABLES'):
            log(logging.INFO, "  Importing mysql rdg tables ...")
            concat_tables_fdw(dest, dbsrc_meta, 'original_rdg', 'var_fdw',
                              config['MYSQL_RDG_TABLES'])

    with dbdest.engine.begin() as dest:
        if config.get('MYSQL_VAR_TABLES'):
            log(logging.INFO, "  Importing mysql var tables ...")
            concat_tables_fdw(dest, dbsrc_meta, 'original_var', 'var_fdw',
                              config['MYSQL_VAR_TABLES'])

    with dbdest.engine.begin() as dest:
        if config.get('MYSQL_MEMO_TABLE'):
            log(logging.INFO, "  Importing mysql memo table ...")
            copy_table_fdw(dest, 'original_memo', 'var_fdw',
                           config['MYSQL_MEMO_TABLE'])
            execute(
                dest, """
            ALTER TABLE original_memo RENAME COLUMN anfadr TO begadr;
            """, parameters)
Esempio n. 8
0
                         help="a .conf file (required)")
    return parser


if __name__ == '__main__':
    args, config = init_cmdline (build_parser ())

    parameters = dict ()
    db = db_tools.PostgreSQLEngine (**config)

    tree = lxml.etree.parse (args.input)

    with db.engine.begin () as conn:
        db_tools.truncate_editor_tables (conn)

        log (logging.INFO, "Build default cliques ...")
        db_tools.init_default_cliques (conn)
        log (logging.INFO, "Build default ms_cliques ...")
        db_tools.init_default_ms_cliques (conn)
        log (logging.INFO, "Build default locstem ...")
        db_tools.init_default_locstem (conn)
        # default notes is an empty table

    log (logging.INFO, "Loading cliques ...")

    with db.engine.begin () as conn:
        values = []
        for row in tree.xpath ('/sql/export_cliques/row'):
            values.append ({ e.tag : e.text for e in row })

        execute (conn, """
Esempio n. 9
0
def create_labez_matrix (dba, parameters, val):
    """Create the :attr:`labez matrix <scripts.cceh.cbgm.CBGM_Params.labez_matrix>`."""

    with dba.engine.begin () as conn:

        np.set_printoptions (threshold = 30)

        # get passages
        res = execute (conn, """
        SELECT count (*)
        FROM passages
        """, parameters)
        val.n_passages = res.fetchone ()[0]

        # get matrix of invariant passages
        # Initialize all passages to 'variant'
        variant_matrix = np.ones ((1, val.n_passages), np.bool_)

        res = execute (conn, """
        SELECT pass_id - 1
        FROM passages
        WHERE NOT (variant)
        """, parameters)

        for row in res:
            variant_matrix [0, row[0]] = False
        val.variant_matrix = variant_matrix

        # get no. of manuscripts
        res = execute (conn, """
        SELECT count (*)
        FROM manuscripts
        """, parameters)
        val.n_mss = res.fetchone ()[0]

        # get no. of ranges
        Range = collections.namedtuple ('Range', 'rg_id range start end')
        res = execute (conn, """
        SELECT rg_id, range, MIN (pass_id) - 1 AS first_id, MAX (pass_id) AS last_id
        FROM ranges ch
        JOIN passages p ON ch.passage @> p.passage
        GROUP BY rg_id, range
        ORDER BY lower (ch.passage), upper (ch.passage) DESC
        """, parameters)
        val.n_ranges = res.rowcount
        val.ranges = list (map (Range._make, res))
        log (logging.INFO, '  No. of ranges: ' + str (val.n_ranges))

        # Matrix ms x pass

        # Initialize all manuscripts to the labez 'a'
        labez_matrix  = np.broadcast_to (np.array ([1], np.uint32), (val.n_mss, val.n_passages)).copy ()

        # overwrite matrix where actual labez is not 'a'
        res = execute (conn, """
        SELECT ms_id - 1, pass_id - 1, ord_labez (labez) as labez
        FROM apparatus a
        WHERE labez != 'a' AND cbgm
        """, parameters)

        for row in res:
            labez_matrix [row[0], row[1]] = row[2]

        # clear matrix where reading is uncertain
        res = execute (conn, """
        SELECT DISTINCT ms_id - 1, pass_id - 1
        FROM apparatus
        WHERE certainty != 1.0
        """, parameters)

        for row in res:
            labez_matrix [row[0], row[1]] = 0

        val.labez_matrix = labez_matrix

        # Boolean matrix ms x pass set where passage is defined
        val.def_matrix = np.greater (val.labez_matrix, 0)
        val.def_matrix = np.logical_and (val.def_matrix, val.variant_matrix) # mask invariant passages

        log (logging.INFO, '  Size of the labez matrix: ' + str (val.labez_matrix.shape))
Esempio n. 10
0
def write_affinity_table (dba, parameters, val):
    """Write back the new affinity (and ms_ranges) tables.

    """

    with dba.engine.begin () as conn:
        # perform sanity tests

        # varid older than ms A
        if val.ancestor_matrix[0,:,0].any ():
            log (logging.ERROR, "Found varid older than A in msids: %s"
                 % (np.nonzero (val.ancestor_matrix[0,:,0])))

        # norel < 0
        norel_matrix = (val.and_matrix - val.eq_matrix - val.ancestor_matrix -
                        np.transpose (val.ancestor_matrix, (0, 2, 1)) - val.unclear_ancestor_matrix)
        if np.less (norel_matrix, 0).any ():
            log (logging.ERROR, "norel < 0 in mss. %s"
                 % (np.nonzero (np.less (norel_matrix, 0))))

        # calculate ranges lengths using numpy
        params = []
        for i in range (0, val.n_mss):
            for range_ in val.ranges:
                length = int (np.sum (val.def_matrix[i, range_.start:range_.end]))
                params.append ( { 'ms_id': i + 1, 'range': range_.rg_id, 'length': length } )

        executemany (conn, """
        UPDATE ms_ranges
        SET length = :length
        WHERE ms_id = :ms_id AND rg_id = :range
        """, parameters, params)

        log (logging.INFO, "  Filling Affinity table ...")

        # execute (conn, "TRUNCATE affinity", parameters) # fast but needs access exclusive lock
        execute (conn, "DELETE FROM affinity", parameters)

        for i, range_ in enumerate (val.ranges):
            values = []
            for j in range (0, val.n_mss):
                for k in range (0, val.n_mss):
                    if j != k:
                        common = int (val.and_matrix[i,j,k])
                        equal  = int (val.eq_matrix[i,j,k])
                        if common > 0:
                            values.append ( (
                                range_.rg_id,
                                j + 1,
                                k + 1,
                                float (equal) / common,
                                common,
                                equal,
                                int (val.ancestor_matrix[i,j,k]),
                                int (val.ancestor_matrix[i,k,j]),
                                int (val.unclear_ancestor_matrix[i,j,k]),
                                int (val.parent_matrix[i,j,k]),
                                int (val.parent_matrix[i,k,j]),
                                int (val.unclear_parent_matrix[i,j,k]),
                            ) )

            # speed gain for using executemany_raw: 65s to 55s :-(
            # probably the bottleneck here is string formatting with %s
            executemany_raw (conn, """
            INSERT INTO affinity (rg_id, ms_id1, ms_id2,
                                  affinity, common, equal,
                                  older, newer, unclear,
                                  p_older, p_newer, p_unclear)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            """, parameters, values)

        log (logging.DEBUG, "eq:"        + str (val.eq_matrix))
        log (logging.DEBUG, "ancestor:"  + str (val.ancestor_matrix))
        log (logging.DEBUG, "unclear:"   + str (val.unclear_ancestor_matrix))
        log (logging.DEBUG, "and:"       + str (val.and_matrix))
Esempio n. 11
0
    config = config_from_pyfile(args.profile)

    init_logging(
        args,
        logging.StreamHandler(),  # stderr
        logging.FileHandler('load_edits.log'))

    parameters = dict()
    db = db_tools.PostgreSQLEngine(**config)

    tree = lxml.etree.parse(args.input if args.input != '-' else sys.stdin)

    with db.engine.begin() as conn:
        db_tools.truncate_editor_tables(conn)

        log(logging.INFO, "Build default cliques ...")
        db_tools.init_default_cliques(conn)
        log(logging.INFO, "Build default ms_cliques ...")
        db_tools.init_default_ms_cliques(conn)
        log(logging.INFO, "Build default locstem ...")
        db_tools.init_default_locstem(conn)
        # default notes is an empty table

    log(logging.INFO, "Loading cliques ...")

    with db.engine.begin() as conn:
        values = []
        for row in tree.xpath('/sql/export_cliques/row'):
            values.append({e.tag: e.text for e in row})

        execute(conn, """
Esempio n. 12
0
File: cbgm.py Progetto: cceh/ntg
    parser.add_argument ('profile', metavar='path/to/file.conf',
                         help="a .conf file (required)")
    parser.add_argument ('-v', '--verbose', dest='verbose', action='count',
                         help='increase output verbosity', default=0)
    return parser


if __name__ == '__main__':
    args, config = init_cmdline (build_parser ())

    db = db_tools.PostgreSQLEngine (**config)
    parameters = dict ()
    v = CBGM_Params ()

    log (logging.INFO, "Rebuilding the 'A' text ...")
    build_A_text (db, parameters)

    log (logging.INFO, "Creating the labez matrix ...")
    create_labez_matrix (db, parameters, v)

    log (logging.INFO, "Calculating mss similarity pre-co ...")
    calculate_mss_similarity_preco (db, parameters, v)

    log (logging.INFO, "Calculating mss similarity post-co ...")
    calculate_mss_similarity_postco (db, parameters, v)

    log (logging.INFO, "Writing affinity table ...")
    write_affinity_table (db, parameters, v)

    log (logging.INFO, "Vacuum ...")
Esempio n. 13
0
def calculate_mss_similarity_postco(dba, parameters, val, do_checks=True):
    """Calculate post-coherence mss similarity

    Genealogical coherence outputs asymmetrical matrices.
    Loop over all mss O(n_mss² * n_ranges * n_passages).

    The main idea in this function is to get the DAG (directed acyclic graph)
    into a representation that can be used by numpy.  Numpy gives us a
    tremendous speed boost.

    For every passage and every reading we build

    - a bitmask for the reading and
    - a bitmask for all prior readings of this reading.

    Then for every passage and every manuscript we look up what the manuscript
    offers and store the relative bitmasks in 2 matrices.

    For illustration we refer to this passage (Mc 10:10/16-22 pass_id == 3240):

    .. pic:: dot
       :file: local-stemma-mark-3240.dot
       :align: center

    In a first step every reading (labez and clique) gets assigned a bitmask:

    .. code-block:: none

       labez | clique |                               mask
       ------+--------+-----------------------------------------------------------------
       ?     |        | 0000000000000000000000000000000000000000000000000000000000000001
       a     | 1      | 0000000000000000000000000000000000000000000000000000000000000010
       a     | 2      | 0000000000000000000000000000000000000000000000000000000000000100
       b     | 1      | 0000000000000000000000000000000000000000000000000000000000001000
       c     | 1      | 0000000000000000000000000000000000000000000000000000000000010000
       c     | 2      | 0000000000000000000000000000000000000000000000000000000000100000
       c     | 3      | 0000000000000000000000000000000000000000000000000000000001000000
       d     | 1      | 0000000000000000000000000000000000000000000000000000000010000000
       d     | 2      | 0000000000000000000000000000000000000000000000000000000100000000
       e     | 1      | 0000000000000000000000000000000000000000000000000000001000000000
       f     | 1      | 0000000000000000000000000000000000000000000000000000010000000000
       f     | 2      | 0000000000000000000000000000000000000000000000000000100000000000
       g     | 1      | 0000000000000000000000000000000000000000000000000001000000000000
       h     | 1      | 0000000000000000000000000000000000000000000000000010000000000000
       i     | 1      | 0000000000000000000000000000000000000000000000000100000000000000
       j     | 1      | 0000000000000000000000000000000000000000000000001000000000000000
       k     | 1      | 0000000000000000000000000000000000000000000000010000000000000000
       l     | 1      | 0000000000000000000000000000000000000000000000100000000000000000
       m     | 1      | 0000000000000000000000000000000000000000000001000000000000000000
       n     | 1      | 0000000000000000000000000000000000000000000010000000000000000000
       o     | 1      | 0000000000000000000000000000000000000000000100000000000000000000
       p     | 1      | 0000000000000000000000000000000000000000001000000000000000000000
       q     | 1      | 0000000000000000000000000000000000000000010000000000000000000000
       r     | 1      | 0000000000000000000000000000000000000000100000000000000000000000
       s     | 1      | 0000000000000000000000000000000000000001000000000000000000000000
       t     | 1      | 0000000000000000000000000000000000000010000000000000000000000000
       u     | 1      | 0000000000000000000000000000000000000100000000000000000000000000
       v     | 1      | 0000000000000000000000000000000000001000000000000000000000000000
       v     | 2      | 0000000000000000000000000000000000010000000000000000000000000000
       v     | 3      | 0000000000000000000000000000000000100000000000000000000000000000
       w     | 1      | 0000000000000000000000000000000001000000000000000000000000000000

    Note that we have an extra bitmask for '?'.  This allows quick testing for
    unknown origin.

    In the second step we build the ancestor bitmasks.

    Reading 'f' has prior readings 'c', 'm', and 'a'.  Thus the ancestor bitmask
    for reading 'f' is the bitwise_or of the masks for 'c', 'm', and 'a':

    .. code-block:: none

       labez | clique |                               mask
       ------+--------+-----------------------------------------------------------------
       c     | 1      | 0000000000000000000000000000000000000000000000000000000000010000
       m     | 1      | 0000000000000000000000000000000000000000000001000000000000000000
       a     | 1      | 0000000000000000000000000000000000000000000000000000000000000010

       labez | clique |                            ancestor mask
       ------+--------+-----------------------------------------------------------------
       f     | 1      | 0000000000000000000000000000000000000000000001000000000000010010

    Another example: Reading 'w' has prior readings 'a2', 'a2' is of unknown
    origin. The ancestor mask for 'w' is the bitwise_or of the masks for 'a2' and '?':

    .. code-block:: none

       labez | clique |                               mask
       ------+--------+-----------------------------------------------------------------
       a     | 2      | 0000000000000000000000000000000000000000000000000000000000000100
       ?     |        | 0000000000000000000000000000000000000000000000000000000000000001

       labez | clique |                            ancestor mask
       ------+--------+-----------------------------------------------------------------
       w     | 1      | 0000000000000000000000000000000000000000000000000000000000000101

    After building the masks for every reading at every passage we put the masks
    into 2 matrices of dimension (mss x passages), the mask_matrix and the
    ancestor_matrix.  The mask_matrix contains the mask for the reading the
    manuscript offers, the ancestor_matrix contains the ancestor mask for that reading.

    Manuscript 1457 (ms_id == 156) (at pass_id == 3240) reads 'c', so the
    mask_matrix contains:

    .. code-block::

       mask_matrix[156,3240] = b'0000000000000000000000000000000000000000000000000000000000010000'

    Manuscript 706 (ms_id == 102) (at pass_id == 3240) reads 'f', so the
    ancestor_matrix contains:

    .. code-block::

       ancestor_matrix[102,3240] = b'0000000000000000000000000000000000000000000001000000000000010010'

    To test for ancestrality between mss. 1457 and 706 we do a bitwise_and of the
    mask_matrix of 1457 and the ancestor_matrix of 706. If the result is non-zero then
    1457 is ancestral to 706.

    .. code-block::

       is_ancestral = np.bitwise_and (mask_matrix[156,3240], ancestor_matrix[102,3240]) > 0

    But that would be very slow.  Numpy allows to operate on whole matrix rows
    at a time so we can calculate the ancestrality for all passages with a
    single call to numpy.

    .. code-block::

       is_ancestral = np.bitwise_and (mask_matrix[156], ancestor_matrix[102]) > 0

    is_ancestral is an array of booleans.  We only have to count how many
    elements of it are True to obtain the number of prior readings.

    Reversing the role of the two manuscripts (mask_matrix and ancestor_matrix)
    gives us the number of posterior readings.

    """

    with dba.engine.begin() as conn:

        # Load all passages into memory

        res = execute(
            conn, """
        SELECT pass_id, begadr, endadr FROM passages
        ORDER BY pass_id
        """, parameters)

        stemmas = dict()
        for pass_id, begadr, endadr in res.fetchall():
            G = db_tools.local_stemma_to_nx(conn, pass_id,
                                            True)  # True == add isolated roots

            if do_checks:
                # sanity tests
                # connect the graph through a root node for the following tests:
                G.add_node('root', label='root')
                G.add_edge('root', '*')
                G.add_edge('root', '?')
                if not nx.is_weakly_connected(G):
                    # use it anyway
                    log(
                        logging.WARNING,
                        "Local Stemma @ %s-%s is not connected (pass_id=%s)." %
                        (begadr, endadr, pass_id))
                if not nx.is_directed_acyclic_graph(G):
                    # don't use these
                    log(
                        logging.ERROR,
                        "Local Stemma @ %s-%s is not a directed acyclic graph (pass_id=%s)."
                        % (begadr, endadr, pass_id))
                    continue
                # ... and remove it again
                G.remove_node('root')

            G.nodes['*']['mask'] = 0
            G.nodes['?'][
                'mask'] = 1  # bitmask == 1 signifies source is unclear

            # build node bitmasks.  Every node gets a different bit set.
            i = 1
            for n in sorted(G.nodes()):
                attrs = G.nodes[n]
                attrs['parents'] = 0
                attrs['ancestors'] = 0
                if 'mask' not in attrs:
                    i += 1
                    if i < 64:
                        attrs['mask'] = (1 << i)
                    else:
                        attrs['mask'] = 0
                        # mask is 64 bit only
                        log(
                            logging.ERROR,
                            "Too many cliques in local stemma @ %s-%s (pass_id=%s)."
                            % (begadr, endadr, pass_id))

            # build the parents bit mask. We set the bits of the parent nodes.
            for n in G:
                mask = G.nodes[n]['mask']
                for succ in G.successors(n):
                    G.nodes[succ]['parents'] |= mask

            # build the ancestors mask.  We set the bits of all node ancestors.
            TC = nx.transitive_closure(G)
            for n in TC:
                # transitive_closure does not copy attributes !
                mask = G.nodes[n]['mask']
                for succ in TC.successors(n):
                    G.nodes[succ]['ancestors'] |= mask

            # save the graph for later
            stemmas[pass_id - 1] = G

        # Matrix mss x passages containing the bitmask of the current reading
        mask_matrix = np.zeros((val.n_mss, val.n_passages), np.uint64)
        # Matrix mss x passages containing the bitmask of the parent readings
        parent_matrix = np.zeros((val.n_mss, val.n_passages), np.uint64)
        # Matrix mss x passages containing the bitmask of the ancestral readings
        ancestor_matrix = np.zeros((val.n_mss, val.n_passages), np.uint64)

        # load ms x pass
        res = execute(
            conn, """
        SELECT pass_id - 1 AS pass_id,
               ms_id   - 1 AS ms_id,
               labez_clique (labez, clique) AS labez_clique
        FROM apparatus_cliques_view a
        WHERE labez !~ '^z[u-z]' AND cbgm
        ORDER BY pass_id
        """, parameters)

        LocStemEd = collections.namedtuple('LocStemEd',
                                           'pass_id ms_id labez_clique')
        rows = list(map(LocStemEd._make, res))

        # If ((current bitmask of ms j) and (ancestor bitmask of ms k) > 0) then
        # ms j is an ancestor of ms k.

        error_count = 0
        for row in rows:
            try:
                attrs = stemmas[row.pass_id].nodes[row.labez_clique]
                mask_matrix[row.ms_id, row.pass_id] = attrs['mask']
                parent_matrix[row.ms_id, row.pass_id] = attrs['parents']
                ancestor_matrix[row.ms_id, row.pass_id] = attrs['ancestors']
            except KeyError:
                error_count += 1
                # print (row.pass_id + 1)
                # print (str (e))

        # Matrix mss x passages containing True if source is unclear (s1 = '?')
        quest_matrix = np.bitwise_and(parent_matrix,
                                      1)  # 1 means source unclear

        if error_count:
            log(
                logging.WARNING,
                "Could not find labez and clique in LocStem in %d cases." %
                error_count)
        log(logging.DEBUG, "mask:\n" + str(mask_matrix))
        log(logging.DEBUG, "parents:\n" + str(parent_matrix))
        log(logging.DEBUG, "ancestors:\n" + str(ancestor_matrix))
        log(logging.DEBUG, "quest:\n" + str(quest_matrix))

        def postco(mask_matrix, anc_matrix):

            local_stemmas_with_loops = set()

            # Matrix range x ms x ms with count of the passages that are older in ms1 than in ms2
            ancestor_matrix = np.zeros((val.n_ranges, val.n_mss, val.n_mss),
                                       dtype=np.uint16)

            # Matrix range x ms x ms with count of the passages whose relationship is unclear in ms1 and ms2
            unclear_matrix = np.zeros((val.n_ranges, val.n_mss, val.n_mss),
                                      dtype=np.uint16)

            for j in range(0, val.n_mss):
                for k in range(0, val.n_mss):
                    # See: VGA/VGActs_allGenTab3Ph3.pl

                    # set bit if the reading of j is ancestral to the reading of k
                    varidj_is_older = np.bitwise_and(mask_matrix[j],
                                                     anc_matrix[k]) > 0
                    varidk_is_older = np.bitwise_and(mask_matrix[k],
                                                     anc_matrix[j]) > 0

                    if j == 0 and k > 0 and varidk_is_older.any():
                        log(
                            logging.ERROR,
                            "Found varid older than A in msid: %d = %s" %
                            (k, np.nonzero(varidk_is_older)))

                    # error check for loops
                    if do_checks:
                        check = np.logical_and(varidj_is_older,
                                               varidk_is_older)
                        if np.any(check):
                            not_check = np.logical_not(check)
                            varidj_is_older = np.logical_and(
                                varidj_is_older, not_check)
                            varidk_is_older = np.logical_and(
                                varidk_is_older, not_check)

                            local_stemmas_with_loops |= set(
                                np.nonzero(check)[0])

                    # wenn die vergl. Hss. von einander abweichen u. eine von ihnen
                    # Q1 = '?' hat, UND KEINE VON IHNEN QUELLE DER ANDEREN IST, ist
                    # die Beziehung 'UNCLEAR'

                    unclear = np.logical_and(val.def_matrix[j],
                                             val.def_matrix[k])
                    unclear = np.logical_and(
                        unclear,
                        np.not_equal(val.labez_matrix[j], val.labez_matrix[k]))
                    unclear = np.logical_and(
                        unclear, np.logical_or(quest_matrix[j],
                                               quest_matrix[k]))
                    unclear = np.logical_and(
                        unclear,
                        np.logical_not(
                            np.logical_or(varidj_is_older, varidk_is_older)))

                    ancestor_matrix[:, j, k] = count_by_range(
                        varidj_is_older, val.range_starts, val.range_ends)
                    unclear_matrix[:, j,
                                   k] = count_by_range(unclear,
                                                       val.range_starts,
                                                       val.range_ends)

            if local_stemmas_with_loops:
                log(
                    logging.ERROR, "Found loops in local stemmata: %s" %
                    sorted(local_stemmas_with_loops))

            return ancestor_matrix, unclear_matrix

        val.parent_matrix, val.unclear_parent_matrix = postco(
            mask_matrix, parent_matrix)
        val.ancestor_matrix, val.unclear_ancestor_matrix = postco(
            mask_matrix, ancestor_matrix)
Esempio n. 14
0
if __name__ == '__main__':

    build_parser().parse_args(namespace=args)
    config = config_from_pyfile(args.profile)

    init_logging(
        args,
        logging.StreamHandler(),  # stderr
        logging.FileHandler('cbgm.log'))

    db = db_tools.PostgreSQLEngine(**config)
    parameters = dict()
    v = CBGM_Params()

    log(logging.INFO, "Rebuilding the 'A' text ...")
    build_A_text(db, parameters)

    log(logging.INFO, "Creating the labez matrix ...")
    create_labez_matrix(db, parameters, v)

    log(logging.INFO, "Calculating mss similarity pre-co ...")
    calculate_mss_similarity_preco(db, parameters, v)

    log(logging.INFO, "Calculating mss similarity post-co ...")
    calculate_mss_similarity_postco(db, parameters, v)

    log(logging.INFO, "Writing affinity table ...")
    write_affinity_table(db, parameters, v)

    log(logging.INFO, "Vacuum ...")
Esempio n. 15
0
def congruence(conn, passage):
    """Check the congruence.

    "Das Prüfprogramm soll eine Inkongruenz anzeigen, wenn der Zeuge einer Lesart
    x, die im lokalen Stemma von y abhängt UND (bei x keinen pV mit Conn <= 5
    hat ODER bei y keinen pV mit höherem Rang hat als ein weiterer pV bei einer
    anderen Variante), nicht mit x ODER x(n) der Quelle "?" zugeordnet wird."
    -- email K. Wachtel 16.01.2020

    Wenn Lesart x im lokalen Stemma von y != ? abhängt, muß jeder Zeuge der
    Lesart x:

    1. einen pV(conn=5) der Lesart x haben, oder

    2. der höchste pV(!= zz) die Lesart y haben.

    Wenn Lesart x im lokalen Stemma von ? abhängt, ist keine Aussage möglich.

    """

    res = execute(
        conn, """
    -- get the closest ancestors ms1 for every manuscript ms2
    WITH ranks AS (
      SELECT
        aff.ms_id1,
        aff.ms_id2,
        ms1.hs as hs1,
        ms2.hs as hs2,
        q1.labez AS labez1,
        q2.labez AS labez2,
        q1.clique AS clique1,
        q2.clique AS clique2,
        labez_clique (q1.labez, q1.clique) as lq1,
        labez_clique (q2.labez, q2.clique) as lq2,
        l.source_labez,
        l.source_clique,
        labez_clique (l.source_labez, l.source_clique) as source_lq,
        rank () OVER (PARTITION BY ms_id2 ORDER BY affinity DESC, common, older, newer DESC, ms_id1) AS rank,
        affinity
      FROM affinity_p_view aff
        JOIN manuscripts ms1 ON ms1.ms_id = aff.ms_id1
        JOIN manuscripts ms2 ON ms2.ms_id = aff.ms_id2
        JOIN apparatus_cliques_view q1 ON q1.ms_id = aff.ms_id1 AND q1.pass_id = :pass_id
        JOIN apparatus_cliques_view q2 ON q2.ms_id = aff.ms_id2 AND q2.pass_id = :pass_id
        JOIN locstem l ON (l.pass_id, l.labez, l.clique) = (q2.pass_id, q2.labez, q2.clique)
      WHERE ms_id1 NOT IN :exclude
        AND ms_id2 NOT IN :exclude
        AND q1.labez != 'zz'
        AND q2.labez != 'zz'
        AND q1.certainty = 1.0
        AND q2.certainty = 1.0
        AND aff.rg_id = :rg_id
        AND aff.newer < aff.older
        AND aff.common > aff.ms2_length / 2
      ORDER BY affinity DESC
    )

    -- output mss that fail both rules
    SELECT hs1, hs2, ms_id1, ms_id2, lq1, lq2, rank
    FROM ranks r
    WHERE lq1 != lq2
      AND r.source_labez != '?'
      AND r.rank <= :connectivity
      AND -- ms2 fails rule 1
        NOT EXISTS (
          SELECT 1 FROM ranks rr
          WHERE rr.ms_id2 = r.ms_id2
            AND rr.lq1    = r.lq2
            AND rr.rank  <= :connectivity
        )
      AND -- ms2 fails rule 2
        NOT EXISTS (
          SELECT * FROM ranks rr
          WHERE rr.ms_id2     = r.ms_id2
            AND (rr.source_lq = r.lq1 OR rr.source_labez = '?')
            AND rr.rank <= 1
        )
    ORDER BY hs2, rank
    """,
        dict(
            rg_id=passage.range_id('All'),
            pass_id=passage.pass_id,
            connectivity=5,
            exclude=(2, ),
        ))

    Ranks = collections.namedtuple('Ranks',
                                   'ms1 ms2 ms_id1 ms_id2 labez1 labez2 rank')
    ranks = list(map(Ranks._make, res))

    tools.log(logging.INFO, 'rg_id: ' + str(passage.range_id('All')))

    return ranks
Esempio n. 16
0
def concat_tables_fdw(conn, meta, dest_table, fdw, table_mask):
    """Concatenate multiple tables into one."""

    table_mask = re.compile('^%s$' % table_mask)

    # find the set of fields common to all input tables.  check types also.  it
    # is ridiculous that we have to do this but the table structures are highly
    # inconsistent even between chapters of the same book.
    source_table = None
    column_set = collections.OrderedDict()
    for t in sorted(meta.tables.keys()):
        if table_mask.match(t):
            source_model = sqlalchemy.Table(t, meta, autoload=True)
            if source_table is None:
                source_table = t
                for c in source_model.columns:
                    column_set[c.name] = c.type.python_type
            else:
                col_set = {
                    c.name: c.type.python_type
                    for c in source_model.columns
                }
                for name, type_ in list(column_set.items()):
                    if col_set.get(name, '') != type_:
                        del column_set[name]

    # create a table with those fields common to all input tables, lowercase the
    # field names
    execute(conn, """
    DROP TABLE IF EXISTS {dest_table}
    """, dict(parameters, dest_table=dest_table))

    execute(
        conn, """
    CREATE TABLE {dest_table} ( LIKE {fdw}."{source_table}" )
    """,
        dict(parameters,
             dest_table=dest_table,
             source_table=source_table,
             fdw=fdw))

    source_model = sqlalchemy.Table(source_table, meta, autoload=True)
    cols = [column.name for column in source_model.columns]

    for column in cols:
        if column in column_set:
            if column != column.lower():
                execute(
                    conn,
                    'ALTER TABLE {dest_table} RENAME COLUMN "{source_column}" TO "{dest_column}"',
                    dict(parameters,
                         dest_table=dest_table,
                         source_column=column,
                         dest_column=column.lower()))
        else:
            execute(
                conn, 'ALTER TABLE {dest_table} DROP COLUMN "{source_column}"',
                dict(parameters,
                     dest_table=dest_table,
                     source_column=column,
                     dest_column=column.lower()))

    execute(conn, """COMMIT""", parameters)

    # concat the input tables
    for source_table in sorted(meta.tables.keys()):
        if not table_mask.match(source_table):
            continue
        log(logging.DEBUG, "    Copying table %s" % source_table)

        source_columns = ['"' + column + '"' for column in column_set.keys()]
        dest_columns = [
            '"' + column.lower() + '"' for column in column_set.keys()
        ]

        execute(
            conn, """
        INSERT INTO {dest_table} ({dest_columns})
        SELECT {source_columns}
        FROM {fdw}."{source_table}"
        """,
            dict(parameters,
                 source_table=source_table,
                 dest_table=dest_table,
                 fdw=fdw,
                 source_columns=', '.join(source_columns),
                 dest_columns=', '.join(dest_columns)))
Esempio n. 17
0
File: import.py Progetto: cceh/ntg
if __name__ == '__main__':

    args, config = init_cmdline (build_parser ())

    parameters = dict ()

    dbsrc1 = db_tools.MySQLEngine      (config['MYSQL_CONF'], config['MYSQL_GROUP'], config['MYSQL_ECM_DB'])
    dbsrc2 = db_tools.MySQLEngine      (config['MYSQL_CONF'], config['MYSQL_GROUP'], config['MYSQL_VG_DB'])
    dbsrc3 = db_tools.MySQLEngine      (config['MYSQL_CONF'], config['MYSQL_GROUP'], config['MYSQL_NESTLE_DB'])
    dbdest = db_tools.PostgreSQLEngine (**config)

    db.fdw ('app_fdw',    db.Base.metadata,  dbdest, dbsrc1)
    db.fdw ('var_fdw',    db.Base2.metadata, dbdest, dbsrc2)
    db.fdw ('nestle_fdw', db.Base4.metadata, dbdest, dbsrc3)

    log (logging.INFO, "Creating Database Schema ...")

    db.Base.metadata.drop_all  (dbdest.engine)
    db.Base2.metadata.drop_all (dbdest.engine)
    db.Base4.metadata.drop_all (dbdest.engine)

    db.Base.metadata.create_all  (dbdest.engine)
    db.Base2.metadata.create_all (dbdest.engine)
    db.Base4.metadata.create_all (dbdest.engine)

    log (logging.INFO, "Importing mysql tables ...")

    import_att_fdw (dbsrc1, dbdest, parameters)

    import_genealogical_fdw (dbsrc2, dbdest, parameters)
Esempio n. 18
0
File: editor.py Progetto: cceh/ntg
def stemma_edit (passage_or_id):
    """Edit a local stemma.

    Called from local-stemma.js (split, merge, move) and textflow.js (move-manuscripts).

    """

    if not flask_login.current_user.has_role ('editor'):
        raise PrivilegeError ('You don\'t have editor privilege.')

    args = request.get_json ()

    action = args.get ('action')

    if action not in ('split', 'merge', 'move', 'move-manuscripts'):
        raise EditError ('Bad request')

    params = { 'original_new' : args.get ('labez_new') == '*' }
    for n in 'labez_old labez_new'.split ():
        params[n] = args.get (n)
        if not RE_VALID_LABEZ.match (params[n]):
            raise EditError ('Bad request')
        if params[n] in ('*', '?'):
            params[n] = None
    for n in 'clique_old clique_new'.split ():
        params[n] = args.get (n)
        if not RE_VALID_CLIQUE.match (params[n]):
            raise EditError ('Bad request')
        if params[n] == '0':
            params[n] = None

    with current_app.config.dba.engine.begin () as conn:
        passage = Passage (conn, passage_or_id)
        params['pass_id'] = passage.pass_id
        params['user_id'] = flask_login.current_user.id

        res = execute (conn, """
        SET LOCAL ntg.user_id = :user_id;
        """, dict (parameters, **params))

        if action == 'move':
            try:
                res = execute (conn, """
                UPDATE locstem
                SET source_labez = :labez_new, source_clique = :clique_new, original = :original_new
                WHERE pass_id = :pass_id AND labez = :labez_old AND clique = :clique_old
                """, dict (parameters, **params))
            except sqlalchemy.exc.IntegrityError as e:
                if 'unique constraint' in str (e):
                    raise EditError (
                        '''Only one original reading allowed. If you want to change the original
                        reading, first remove the old original reading.<br/><br/>''' + str (e)
                    )
                raise EditError (str (e))
            except sqlalchemy.exc.DatabaseError as e:
                raise EditError (str (e))

            # test the still uncommited changes

            graph = db_tools.local_stemma_to_nx (conn, passage.pass_id)

            # test: not a DAG
            if not nx.is_directed_acyclic_graph (graph):
                raise EditError ('The graph is not a DAG anymore.')
            # test: not connected
            graph.add_edge ('*', '?')
            if not nx.is_weakly_connected (graph):
                raise EditError ('The graph is not connected anymore.')
            # test: x derived from x
            for e in graph.edges:
                m0 = RE_EXTRACT_LABEZ.match (e[0])
                m1 = RE_EXTRACT_LABEZ.match (e[1])
                if m0 and m1 and m0.group (1) == m1.group (1):
                    raise EditError (
                        '''A reading cannot be derived from the same reading.
                        If you want to <b>merge</b> instead, use shift + drag.'''
                    )
        elif action == 'split':
            # get the next free clique
            res = execute (conn, """
            SELECT max (clique)
            FROM  cliques
            WHERE pass_id = :pass_id AND labez = :labez_old
            """, dict (parameters, **params))
            params['clique_next'] = str (int (res.fetchone ()[0]) + 1)

            # insert into cliques table
            res = execute (conn, """
            INSERT INTO cliques (pass_id, labez, clique)
            VALUES (:pass_id, :labez_old, :clique_next)
            """, dict (parameters, **params))

            # insert into locstem table with source = '?'
            res = execute (conn, """
            INSERT INTO locstem (pass_id, labez, clique, source_labez, source_clique, original)
            VALUES (:pass_id, :labez_old, :clique_next, NULL, NULL, false)
            """, dict (parameters, **params))

        elif action == 'merge':
            # reassign manuscripts to merged clique
            res = execute (conn, """
            UPDATE ms_cliques
            SET clique = :clique_new
            WHERE (pass_id, labez, clique) = (:pass_id, :labez_old, :clique_old)
            """, dict (parameters, **params))

            # reassign sources to merged clique
            res = execute (conn, """
            UPDATE locstem
            SET source_clique = :clique_new
            WHERE (pass_id, source_labez, source_clique) = (:pass_id, :labez_old, :clique_old)
            """, dict (parameters, **params))

            # remove clique from locstem
            res = execute (conn, """
            DELETE FROM locstem
            WHERE (pass_id, labez, clique) = (:pass_id, :labez_old, :clique_old)
            """, dict (parameters, **params))

            # remove clique from cliques
            res = execute (conn, """
            DELETE FROM cliques
            WHERE (pass_id, labez, clique) = (:pass_id, :labez_old, :clique_old)
            """, dict (parameters, **params))

        elif action == 'move-manuscripts':
            ms_ids = set (args.get ('ms_ids') or [])

            # reassign manuscripts to new clique
            res = execute (conn, """
            UPDATE apparatus_cliques_view
            SET clique = :clique_new
            WHERE (pass_id, labez, clique) = (:pass_id, :labez_old, :clique_old)
              AND ms_id IN :ms_ids
            """, dict (parameters, ms_ids = tuple (ms_ids), **params))

            tools.log (logging.INFO, 'Moved ms_ids: ' + str (ms_ids))

        # return the changed passage
        passage = Passage (conn, passage_or_id)
        return make_json_response (passage.to_json ())

    raise EditError ('Could not edit local stemma.')
Esempio n. 19
0
File: import.py Progetto: cceh/ntg
def concat_tables_fdw (conn, meta, dest_table, fdw, table_mask):
    """Concatenate multiple tables into one."""

    table_mask = re.compile ('^%s$' % table_mask)

    # find the set of fields common to all input tables.  check types also.  it
    # is ridiculous that we have to do this but the table structures are highly
    # inconsistent even between chapters of the same book.
    source_table = None
    column_set = collections.OrderedDict ()
    for t in sorted (meta.tables.keys ()):
        if table_mask.match (t):
            source_model = sqlalchemy.Table (t, meta, autoload = True)
            if source_table is None:
                source_table = t
                for c in source_model.columns:
                    column_set[c.name] = c.type.python_type
            else:
                col_set = { c.name : c.type.python_type for c in source_model.columns }
                for name, type_ in list (column_set.items ()):
                    if col_set.get (name, '') != type_:
                        del column_set[name]

    # create a table with those fields common to all input tables, lowercase the
    # field names
    execute (conn, """
    DROP TABLE IF EXISTS {dest_table}
    """, dict (parameters, dest_table = dest_table))

    execute (conn, """
    CREATE TABLE {dest_table} ( LIKE {fdw}."{source_table}" )
    """, dict (parameters, dest_table = dest_table, source_table = source_table, fdw = fdw))

    source_model = sqlalchemy.Table (source_table, meta, autoload = True)
    cols = [column.name for column in source_model.columns]

    for column in cols:
        if column in column_set:
            if column != column.lower ():
                execute (conn, 'ALTER TABLE {dest_table} RENAME COLUMN "{source_column}" TO "{dest_column}"',
                         dict (parameters, dest_table = dest_table, source_column = column, dest_column = column.lower ()))
        else:
            execute (conn, 'ALTER TABLE {dest_table} DROP COLUMN "{source_column}"',
                     dict (parameters, dest_table = dest_table, source_column = column, dest_column = column.lower ()))

    execute (conn, """COMMIT""", parameters);

    # concat the input tables
    for source_table in sorted (meta.tables.keys ()):
        if not table_mask.match (source_table):
            continue
        log (logging.DEBUG, "    Copying table %s" % source_table)

        source_columns = ['"' + column + '"'          for column in column_set.keys ()]
        dest_columns   = ['"' + column.lower () + '"' for column in column_set.keys ()]

        execute (conn, """
        INSERT INTO {dest_table} ({dest_columns})
        SELECT {source_columns}
        FROM {fdw}."{source_table}"
        """, dict (parameters, source_table = source_table, dest_table = dest_table, fdw = fdw,
                   source_columns = ', '.join (source_columns),
                   dest_columns = ', '.join (dest_columns)))
Esempio n. 20
0
                         help='increase output verbosity', default=0)
    parser.add_argument ('-o', '--output', metavar='path/to/output.xml',
                         help="the output file (required)", required=True)
    parser.add_argument ('profile', metavar='path/to/file.conf',
                         help="a .conf file (required)")
    return parser


if __name__ == '__main__':
    args, config = init_cmdline (build_parser ())

    book = config['BOOK']
    parameters = dict ()
    db = db_tools.PostgreSQLEngine (**config)

    log (logging.INFO, "Saving changes ...")

    with open (args.output, 'w', encoding='utf-8') as fp:
        with db.engine.begin () as conn:
            fp.write ('<?xml version="1.0" encoding="utf-8" ?>\n\n')

            fp.write ('<sql profile="%s">\n' % args.profile)

            res = execute (conn, """
            SELECT (table_to_xml ('export_cliques', true, false, ''))
            """, parameters)

            fp.write (res.fetchone ()[0])
            fp.write ('\n')

            res = execute (conn, """
Esempio n. 21
0
def stemma_edit(passage_or_id):
    """Edit a local stemma.

    Called from local-stemma.js (split, merge, move) and textflow.js (move-manuscripts).

    """

    edit_auth()

    args = request.get_json()

    action = args.get('action')

    if action not in ('add', 'del', 'split', 'merge', 'move',
                      'move-manuscripts'):
        raise EditError('Bad request')

    params = {}
    for n in 'labez_old labez_new source_labez'.split():
        if n in args:
            params[n] = args.get(n)
            if not RE_VALID_LABEZ.match(params[n]):
                raise EditError('Bad request')
    for n in 'clique_old clique_new source_clique'.split():
        if n in args:
            params[n] = args.get(n)
            if not RE_VALID_CLIQUE.match(params[n]):
                raise EditError('Bad request')

    def integrity_error(e):
        if 'ix_locstem_unique_original' in str(e):
            raise EditError(
                '''Only one original reading allowed. If you want to change the original
                reading, first remove the old original reading.<br/><br/>''' +
                str(e))
        if 'locstem_pkey' in str(e):
            raise EditError(
                '''This readings already dependes on that reading.<br/><br/>'''
                + str(e))
        if 'same_source' in str(e):
            raise EditError(
                '''A reading cannot be derived from the same reading.
                If you want to <b>merge two readings</b>, use shift + drag.''')
        raise EditError(str(e))

    with current_app.config.dba.engine.begin() as conn:
        passage = Passage(conn, passage_or_id)
        params['pass_id'] = passage.pass_id
        params['user_id'] = flask_login.current_user.id

        res = execute(
            conn, """
        SET LOCAL ntg.user_id = :user_id;
        """, dict(parameters, **params))

        if action == 'move':
            # reassign a source reading
            # there may be multiple existent assignments, there'll be only one left
            try:
                res = execute(
                    conn, """
                DELETE FROM locstem
                WHERE (pass_id, labez, clique) = (:pass_id, :labez_old, :clique_old);
                INSERT INTO locstem (pass_id, labez, clique, source_labez, source_clique)
                VALUES (:pass_id, :labez_old, :clique_old, :labez_new, :clique_new)
                """, dict(parameters, **params))
            except sqlalchemy.exc.IntegrityError as e:
                integrity_error(e)
            except sqlalchemy.exc.DatabaseError as e:
                raise EditError(str(e))

        if action == 'del':
            # remove a source reading
            try:
                # check if we are asked to remove the only link,
                # in that case reassign to 'unknown'
                res = execute(
                    conn, """
                SELECT pass_id
                FROM locstem
                WHERE (pass_id, labez, clique) = (:pass_id, :labez_old, :clique_old);
                """, dict(parameters, **params))

                tools.log(logging.INFO, 'Deleting: ' + str(params))

                if res.rowcount > 1:
                    res = execute(
                        conn, """
                    DELETE FROM locstem
                    WHERE (pass_id, labez, clique) = (:pass_id, :labez_old, :clique_old)
                      AND (source_labez, source_clique) = (:source_labez, :source_clique)
                    """, dict(parameters, **params))
                else:
                    res = execute(
                        conn, """
                    UPDATE locstem
                    SET (source_labez, source_clique) = ('?', '1')
                    WHERE (pass_id, labez, clique) = (:pass_id, :labez_old, :clique_old);
                    """, dict(parameters, **params))
            except sqlalchemy.exc.IntegrityError as e:
                integrity_error(e)
            except sqlalchemy.exc.DatabaseError as e:
                raise EditError(str(e))

        if action == 'add':
            # add a source reading
            try:
                res = execute(
                    conn, """
                INSERT INTO locstem (pass_id, labez, clique, source_labez, source_clique)
                VALUES (:pass_id, :labez_old, :clique_old, :labez_new, :clique_new)
                """, dict(parameters, **params))
            except sqlalchemy.exc.IntegrityError as e:
                integrity_error(e)
            except sqlalchemy.exc.DatabaseError as e:
                raise EditError(str(e))

        if action in ('add', 'del', 'move'):
            # test the still uncommitted changes

            graph = db_tools.local_stemma_to_nx(conn, passage.pass_id)

            # test: not a DAG
            if not nx.is_directed_acyclic_graph(graph):
                raise EditError('The new graph contains cycles.')
            # test: not connected
            graph.add_edge('*', '?')
            if not nx.is_weakly_connected(graph):
                raise EditError('The new graph is not connected.')

        elif action == 'split':
            # Get the lowest free integer for the new clique. See: #122
            res = execute(
                conn, """
            SELECT clique
            FROM  cliques
            WHERE pass_id = :pass_id AND labez = :labez_old
            """, dict(parameters, **params))

            taken = set([int(r[0]) for r in res])
            n = 1
            while n in taken:
                n += 1
            params['clique_next'] = str(n)

            # insert into cliques table
            res = execute(
                conn, """
            INSERT INTO cliques (pass_id, labez, clique)
            VALUES (:pass_id, :labez_old, :clique_next)
            """, dict(parameters, **params))

            # insert into locstem table with source = '?'
            res = execute(
                conn, """
            INSERT INTO locstem (pass_id, labez, clique, source_labez, source_clique)
            VALUES (:pass_id, :labez_old, :clique_next, '?', '1')
            """, dict(parameters, **params))

        elif action == 'merge':
            # merge two cliques (eg. b1, b2) into one clique (eg. b1)
            #
            # reassign manuscripts to merged clique
            res = execute(
                conn, """
            UPDATE ms_cliques
            SET clique = :clique_new
            WHERE (pass_id, labez, clique) = (:pass_id, :labez_old, :clique_old)
            """, dict(parameters, **params))

            # reassign sources to merged clique
            res = execute(
                conn, """
            UPDATE locstem
            SET source_clique = :clique_new
            WHERE (pass_id, source_labez, source_clique) = (:pass_id, :labez_old, :clique_old)
            """, dict(parameters, **params))

            # remove clique from locstem
            res = execute(
                conn, """
            DELETE FROM locstem
            WHERE (pass_id, labez, clique) = (:pass_id, :labez_old, :clique_old)
            """, dict(parameters, **params))

            # remove clique from cliques
            res = execute(
                conn, """
            DELETE FROM cliques
            WHERE (pass_id, labez, clique) = (:pass_id, :labez_old, :clique_old)
            """, dict(parameters, **params))

        elif action == 'move-manuscripts':
            # reassign a set of manuscripts to a new clique
            ms_ids = set(args.get('ms_ids') or [])

            res = execute(
                conn, """
            UPDATE apparatus_cliques_view
            SET clique = :clique_new
            WHERE (pass_id, labez, clique) = (:pass_id, :labez_old, :clique_old)
              AND ms_id IN :ms_ids
            """, dict(parameters, ms_ids=tuple(ms_ids), **params))

            tools.log(logging.INFO, 'Moved ms_ids: ' + str(ms_ids))

        # return the changed passage
        passage = Passage(conn, passage_or_id)
        return make_json_response(passage.to_json())

    raise EditError('Could not edit local stemma.')
Esempio n. 22
0
if __name__ == '__main__':

    build_parser ().parse_args (namespace = args)
    config = config_from_pyfile (args.profile)

    init_logging (
        args,
        logging.StreamHandler (), # stderr
        logging.FileHandler ('save_edits.log')
    )

    book = config['BOOK']
    parameters = dict ()
    db = db_tools.PostgreSQLEngine (**config)

    log (logging.INFO, "Saving changes ...")

    if args.output == '-':
        fp = sys.stdout
    else:
        fp = open (args.output, 'w', encoding='utf-8')

    with db.engine.begin () as conn:
        fp.write ('<?xml version="1.0" encoding="utf-8" ?>\n\n')

        fp.write ('<sql profile="%s">\n' % args.profile)

        res = execute (conn, """
        SELECT (table_to_xml ('export_cliques', true, false, ''))
        """, parameters)
Esempio n. 23
0
def calculate_mss_similarity_postco (dba, parameters, val, do_checks = True):
    """Calculate post-coherence mss similarity

    Genealogical coherence outputs asymmetrical matrices.
    Loop over all mss O(n_mss² * n_ranges * n_passages).

    """

    with dba.engine.begin () as conn:

        # Load all passages into memory

        res = execute (conn, """
        SELECT pass_id, begadr, endadr FROM passages
        ORDER BY pass_id
        """, parameters)

        stemmas = dict ()
        for pass_id, begadr, endadr in res.fetchall ():
            G = db_tools.local_stemma_to_nx (conn, pass_id, True) # True == add isolated roots

            if do_checks:
                # sanity tests
                # connect the graph through a root node for the following tests:
                G.add_node ('root', label = 'root')
                G.add_edge ('root', '*')
                G.add_edge ('root', '?')
                if not nx.is_weakly_connected (G):
                    # use it anyway
                    log (logging.WARNING, "Local Stemma @ %s-%s is not connected (pass_id=%s)." %
                         (begadr, endadr, pass_id))
                if not nx.is_directed_acyclic_graph (G):
                    # don't use these
                    log (logging.ERROR, "Local Stemma @ %s-%s is not a directed acyclic graph (pass_id=%s)." %
                         (begadr, endadr, pass_id))
                    continue
                # ... and remove it again
                G.remove_node ('root')

            G.nodes['*']['mask'] = 0
            G.nodes['?']['mask'] = 1 # bitmask == 1 signifies source is unclear

            # build node bitmasks.  Every node gets a different bit set.
            i = 1
            for n in sorted (G.nodes ()):
                attrs = G.nodes[n]
                attrs['parents'] = 0
                attrs['ancestors'] = 0
                if 'mask' not in attrs:
                    i += 1
                    if i < 64:
                        attrs['mask'] = (1 << i)
                    else:
                        attrs['mask'] = 0
                        # mask is 64 bit only
                        log (logging.ERROR, "Too many cliques in local stemma @ %s-%s (pass_id=%s)." %
                             (begadr, endadr, pass_id))

            # build the parents bit mask. We set the bits of the parent nodes.
            for n in G:
                mask = G.nodes[n]['mask']
                for succ in G.successors (n):
                    G.nodes[succ]['parents'] |= mask

            # build the ancestors mask.  We set the bits of all node ancestors.
            TC = nx.transitive_closure (G)
            for n in TC:
                # transitive_closure does not copy attributes !
                mask = G.nodes[n]['mask']
                for succ in TC.successors (n):
                    G.nodes[succ]['ancestors'] |= mask

            # save the graph for later
            stemmas[pass_id - 1] = G

        # Matrix mss x passages containing the bitmask of the current reading
        mask_matrix     = np.zeros ((val.n_mss, val.n_passages), np.uint64)
        # Matrix mss x passages containing the bitmask of the parent readings
        parent_matrix   = np.zeros ((val.n_mss, val.n_passages), np.uint64)
        # Matrix mss x passages containing the bitmask of the ancestral readings
        ancestor_matrix = np.zeros ((val.n_mss, val.n_passages), np.uint64)

        # load ms x pass
        res = execute (conn, """
        SELECT pass_id - 1 AS pass_id,
               ms_id   - 1 AS ms_id,
               labez_clique (labez, clique) AS labez_clique
        FROM apparatus_cliques_view a
        WHERE labez !~ '^z[u-z]' AND cbgm
        ORDER BY pass_id
        """, parameters)

        LocStemEd = collections.namedtuple ('LocStemEd', 'pass_id ms_id labez_clique')
        rows = list (map (LocStemEd._make, res))

        # If ((current bitmask of ms j) and (ancestor bitmask of ms k) > 0) then
        # ms j is an ancestor of ms k.

        error_count = 0
        for row in rows:
            try:
                attrs = stemmas[row.pass_id].nodes[row.labez_clique]
                mask_matrix     [row.ms_id, row.pass_id] = attrs['mask']
                parent_matrix   [row.ms_id, row.pass_id] = attrs['parents']
                ancestor_matrix [row.ms_id, row.pass_id] = attrs['ancestors']
            except KeyError:
                error_count += 1
                # print (row.pass_id + 1)
                # print (str (e))

        # Matrix mss x passages containing True if source is unclear (s1 = '?')
        quest_matrix = np.bitwise_and (parent_matrix, 1)  # 1 means source unclear

        if error_count:
            log (logging.WARNING, "Could not find labez and clique in LocStem in %d cases." % error_count)
        log (logging.DEBUG, "mask:\n"      + str (mask_matrix))
        log (logging.DEBUG, "parents:\n"   + str (parent_matrix))
        log (logging.DEBUG, "ancestors:\n" + str (ancestor_matrix))
        log (logging.DEBUG, "quest:\n"     + str (quest_matrix))

        def postco (mask_matrix, anc_matrix):

            local_stemmas_with_loops = set ()

            # Matrix range x ms x ms with count of the passages that are older in ms1 than in ms2
            ancestor_matrix = np.zeros ((val.n_ranges, val.n_mss, val.n_mss), dtype = np.uint16)

            # Matrix range x ms x ms with count of the passages whose relationship is unclear in ms1 and ms2
            unclear_matrix  = np.zeros ((val.n_ranges, val.n_mss, val.n_mss), dtype = np.uint16)

            for j in range (0, val.n_mss):
                for k in range (0, val.n_mss):
                    # See: VGA/VGActs_allGenTab3Ph3.pl

                    # set bit if the reading of j is ancestral to the reading of k
                    varidj_is_older = np.bitwise_and (mask_matrix[j], anc_matrix[k]) > 0
                    varidk_is_older = np.bitwise_and (mask_matrix[k], anc_matrix[j]) > 0

                    if j == 0 and k > 0 and varidk_is_older.any ():
                        log (logging.ERROR, "Found varid older than A in msid: %d = %s"
                             % (k, np.nonzero (varidk_is_older)))

                    # error check for loops
                    if do_checks:
                        check = np.logical_and (varidj_is_older, varidk_is_older)
                        if np.any (check):
                            not_check       = np.logical_not (check)
                            varidj_is_older = np.logical_and (varidj_is_older, not_check)
                            varidk_is_older = np.logical_and (varidk_is_older, not_check)

                            local_stemmas_with_loops |= set (np.nonzero (check)[0])

                    # wenn die vergl. Hss. von einander abweichen u. eine von ihnen
                    # Q1 = '?' hat, UND KEINE VON IHNEN QUELLE DER ANDEREN IST, ist
                    # die Beziehung 'UNCLEAR'

                    unclear = np.logical_and (val.def_matrix[j], val.def_matrix[k])
                    unclear = np.logical_and (unclear, np.not_equal (val.labez_matrix[j], val.labez_matrix[k]))
                    unclear = np.logical_and (unclear, np.logical_or (quest_matrix[j], quest_matrix[k]))
                    unclear = np.logical_and (unclear, np.logical_not (np.logical_or (varidj_is_older, varidk_is_older)))

                    ancestor_matrix[:,j,k] = count_by_range (varidj_is_older, val.range_starts, val.range_ends)
                    unclear_matrix[:,j,k]  = count_by_range (unclear, val.range_starts, val.range_ends)

            if local_stemmas_with_loops:
                log (logging.ERROR, "Found loops in local stemmata: %s" % sorted (local_stemmas_with_loops))

            return ancestor_matrix, unclear_matrix

        val.parent_matrix,   val.unclear_parent_matrix   = postco (mask_matrix, parent_matrix)
        val.ancestor_matrix, val.unclear_ancestor_matrix = postco (mask_matrix, ancestor_matrix)
Esempio n. 24
0
    init_logging (
        args,
        logging.StreamHandler (), # stderr
        logging.FileHandler ('load_edits.log')
    )

    parameters = dict ()
    db = db_tools.PostgreSQLEngine (**config)

    tree = lxml.etree.parse (args.input if args.input != '-' else sys.stdin)

    with db.engine.begin () as conn:
        db_tools.truncate_editor_tables (conn)

    log (logging.INFO, "Loading cliques ...")

    with db.engine.begin () as conn:
        values = []
        for row in tree.xpath ('/sql/export_cliques/row'):
            values.append ({ e.tag : e.text for e in row })

        execute (conn, """
        TRUNCATE import_cliques;
        """, parameters)

        executemany (conn, """
        INSERT INTO import_cliques (passage, labez, clique,
                                    sys_period, user_id_start, user_id_stop)
        VALUES (:passage, :labez, :clique,
                :sys_period, :user_id_start, :user_id_stop)
Esempio n. 25
0
    parameters = dict()

    dbsrc1 = db_tools.MySQLEngine(config['MYSQL_CONF'], config['MYSQL_GROUP'],
                                  config['MYSQL_ECM_DB'])
    dbsrc2 = db_tools.MySQLEngine(config['MYSQL_CONF'], config['MYSQL_GROUP'],
                                  config['MYSQL_VG_DB'])
    dbsrc3 = db_tools.MySQLEngine(config['MYSQL_CONF'], config['MYSQL_GROUP'],
                                  config['MYSQL_NESTLE_DB'])
    dbdest = db_tools.PostgreSQLEngine(**config)

    db.fdw('app_fdw', db.Base.metadata, dbdest, dbsrc1)
    db.fdw('var_fdw', db.Base2.metadata, dbdest, dbsrc2)
    db.fdw('nestle_fdw', db.Base4.metadata, dbdest, dbsrc3)

    log(logging.INFO, "Creating Database Schema ...")

    db.Base.metadata.drop_all(dbdest.engine)
    db.Base2.metadata.drop_all(dbdest.engine)
    db.Base4.metadata.drop_all(dbdest.engine)

    db.Base.metadata.create_all(dbdest.engine)
    db.Base2.metadata.create_all(dbdest.engine)
    db.Base4.metadata.create_all(dbdest.engine)

    log(logging.INFO, "Importing mysql tables ...")

    import_att_fdw(dbsrc1, dbdest, parameters)

    import_genealogical_fdw(dbsrc2, dbdest, parameters)