Example #1
0
    def _select(self, fields_=()):
        """Combine fields to select.

        Arguments:
            fields_: `(("field_alias", "SUM(field_definition)"), ...)`
        """
        fields_ += (
            ("id", "et.id"),
            ("name", "et.name"),
            ("events_available_count", "COUNT(ee.*)"),
            (
                "event_seats_availability",
                """CASE WHEN 'unlimited' = ANY(ARRAY_AGG(seats_availability))
                   THEN 'unlimited' ELSE 'limited' END""",
            ),
            ("seats_limited_available",
             "COALESCE(SUM(ee.seats_available), 0)"),
            ("open_opportunities_count", "et.open_opportunities_count"),
            ("seats_wanted", "et.seats_wanted_sum"),
        )
        parts = []
        for alias, source in fields_:
            parts.append(sql.SQL(source + " AS ") + sql.Identifier(alias))
        result = sql.Composed(parts).join(", ")
        return result
Example #2
0
    def upsert_weather_data(self, table_name: str = 'weather', **kwargs):
        composite_key = ['date', 'countrycode', 'gid']

        self.check_if_gid_exists(kwargs)
        sql_query = sql.SQL(
            """INSERT INTO {table_name} ({insert_keys}) VALUES ({insert_data})
                                ON CONFLICT
                                    (date, gid)
                                DO
                                    UPDATE SET {update_data}
                               RETURNING *
                                    """).format(
                table_name=sql.Identifier(table_name),
                insert_keys=sql.SQL(",").join(
                    map(sql.Identifier, kwargs.keys())),
                insert_data=sql.SQL(",").join(
                    map(sql.Placeholder, kwargs.keys())),
                update_data=sql.SQL(",").join(
                    sql.Composed(
                        [sql.Identifier(k),
                         sql.SQL("="),
                         sql.Placeholder(k)]) for k in kwargs.keys()
                    if k not in composite_key))

        self.execute(sql_query, kwargs)
        logger.debug("Updating {} table with data: {}".format(
            table_name, list(kwargs.values())))
Example #3
0
    def run(self):
        logging.debug(
            f' Executing RowFilter filter with params: {self.params}')

        query = sql.Composed([
            sql.SQL("DROP TABLE IF EXISTS "),
            sql.Identifier(self.get_outputs()[0]['db_table']),
            sql.SQL("; CREATE TABLE "),
            sql.Identifier(self.get_outputs()[0]['db_table']),
            sql.SQL(" AS ( SELECT * FROM "),
            sql.Identifier(self.get_inputs()[0]['db_table']),
            sql.SQL(" WHERE "),
            sql.SQL(self.params["where_clause"]),
            sql.SQL(" );")
        ])

        db = gdtcdb.db_factory(self.get_outputs()[0])
        str_query = query.as_string(db.get_connection())
        logging.debug(f' SQL to execute: {str_query}')

        try:
            db.execute_query(str_query)
        except psycopg2.Error as e:
            msg = f' Error executing query: {e}'
            logging.error(msg)
            raise RuntimeError(msg)  # If we fail, run must end with an error
        finally:
            db.close_connection()
Example #4
0
    def run(self):
        query = sql.Composed([
            sql.SQL("DROP TABLE IF EXISTS "),
            sql.Identifier(self.get_outputs()[0]['db_table']),
            sql.SQL("; "),
            sql.SQL("CREATE TABLE "),
            sql.Identifier(self.get_outputs()[0]['db_table']),
            sql.SQL(" (rast raster); INSERT INTO "),
            sql.Identifier(self.get_outputs()[0]['db_table']),
            sql.SQL("(rast) VALUES (( SELECT ST_Clip (rast, (SELECT "),
            sql.Identifier(self.params["geom"]),
            sql.SQL(" FROM "),
            sql.Identifier(self.get_inputs()[0]['db_table']),
            sql.SQL(" WHERE ogc_fid = "),
            sql.SQL(self.params["ogc_fid"]),
            sql.SQL("), true) FROM "),
            sql.Identifier(self.get_inputs()[1]['db_table']),
            sql.SQL(" WHERE rid = "),
            sql.SQL(self.params["rid"]),
            sql.SQL(" ));")
        ])

        db = gdtcdb.db_factory(self.get_outputs()[0])
        str_query = query.as_string(db.get_connection())
        logging.debug(f' SQL to execute: {str_query}')

        try:
            db.execute_query(str_query)
        except psycopg2.Error as e:
            msg = f' Error executing query: {e}'
            logging.error(msg)
            raise RuntimeError(msg)  # If we fail, run must end with an error
        finally:
            db.close_connection()
Example #5
0
    def run(self, cursor, limit=None):
        """Runs the query.

        Args:
            cursor: psycopg.cursor instance.
            limit: Integer maximum number of matches. If None (the default), no
                limit is set.

        Returns:
            Dict mapping reaction IDs to serialized Reaction protos.
        """
        components = [
            sql.SQL("""
            SELECT DISTINCT reaction_id, serialized
            FROM reactions
            INNER JOIN rdk.reactions USING (reaction_id)
            WHERE rdk.reactions.r@>reaction_from_smarts(%s)""")
        ]
        args = [self._reaction_smarts]
        if limit:
            components.append(sql.SQL(' LIMIT %s'))
            args.append(limit)
        query = sql.Composed(components).join('')
        logging.info('Running SQL command:%s',
                     cursor.mogrify(query, args).decode())
        cursor.execute(query, args)
        return fetch_results(cursor)
Example #6
0
    def update(self, table, columns, values, where = None, returning = None):
        sql_str = "UPDATE {} SET {}"
        q = sql.SQL(sql_str).format(
                    self.__table_name__(table),
                    sql.SQL(', ').join([sql.SQL("{} = {}").format(
                                sql.Identifier(s[0]),sql.SQL(s[1])
                            ) for s in zip(columns, values)])
                    )
        if where:
            q = sql.Composed([q,sql.SQL(" WHERE {}").format(sql.SQL(' AND ').join(map(sql.SQL,where)))])

        if returning:
            q = sql.Composed([q,sql.SQL(" RETURNING {}").format(sql.Identifier(returning))])
            return self.exec_and_fetch(q)
        else:
            self.execute(q)
    def get_similarity_fields_sqls(matching_methods):
        lateral_joins_sqls = []
        similarity_fields_sqls = []

        for match_method in matching_methods:
            if match_method.similarity_sql:
                similarity_fields_sqls.append(
                    sql.Composed([
                        sql.Identifier(match_method.field_name),
                        sql.SQL(' jsonb') if match_method.is_list_match else
                        sql.SQL(' numeric[]')
                    ]))

                if match_method.is_list_match:
                    lateral_joins_sqls.append(
                        sql.SQL(
                            'CROSS JOIN LATERAL jsonb_to_record(sim.{}) AS {}'
                            '(scores numeric[], size integer)').format(
                                sql.Identifier(match_method.field_name),
                                sql.Identifier('sim_' +
                                               match_method.field_name),
                            ))

        if similarity_fields_sqls:
            lateral_joins_sqls.insert(
                0,
                sql.SQL(
                    'CROSS JOIN LATERAL jsonb_to_record(similarities) AS sim({})'
                ).format(sql.SQL(', ').join(similarity_fields_sqls)))

        return lateral_joins_sqls
Example #8
0
    def upsert_table_data(self, table_name: str, data_keys: List, **kwargs):
        self.check_if_gid_exists(kwargs)
        sql_query = sql.SQL(
            """INSERT INTO {table_name} ({insert_keys}) VALUES ({insert_data})
                                ON CONFLICT
                                    (date, country, countrycode, COALESCE(adm_area_1, ''), COALESCE(adm_area_2, ''),
                                     COALESCE(adm_area_3, ''), source)
                                DO
                                    UPDATE SET {update_data}
                                RETURNING *""").format(
                table_name=sql.Identifier(table_name),
                insert_keys=sql.SQL(",").join(
                    map(sql.Identifier, kwargs.keys())),
                insert_data=sql.SQL(",").join(
                    map(sql.Placeholder, kwargs.keys())),
                update_data=sql.SQL(",").join(
                    sql.Composed(
                        [sql.Identifier(k),
                         sql.SQL("="),
                         sql.Placeholder(k)]) for k in kwargs.keys()
                    if k in data_keys))

        self.execute(sql_query, kwargs)
        logger.debug("Updating {} table with data: {}".format(
            table_name, list(kwargs.values())))
Example #9
0
    def run(self,
            cursor: psycopg2.extensions.cursor,
            limit: Optional[int] = None) -> List[Result]:
        """Runs the query.

        Args:
            cursor: psycopg2 cursor.
            limit: Integer maximum number of matches. If None (the default), no
                limit is set.

        Returns:
            List of Result instances.
        """
        components = [
            sql.SQL("""
            SELECT DISTINCT dataset_id, reaction_id, serialized
            FROM reactions
            WHERE doi = ANY (%s)""")
        ]
        args = [self._dois]
        if limit:
            components.append(sql.SQL(" LIMIT %s"))
            args.append(limit)
        query = sql.Composed(components).join("")
        logger.info("Running SQL command:%s",
                    cursor.mogrify(query, args).decode())
        cursor.execute(query, args)
        return fetch_results(cursor)
Example #10
0
 def _get_where_sql(self, entity: DataEntity):
     return sql.SQL(' AND ').join(
         sql.Composed([
             sql.Identifier(self.get_fields_map(entity)[key]),
             sql.SQL(' = '),
             sql.Literal(entity[key])
         ]) for key in entity.get_key_fields())
Example #11
0
    def upsert_diagnostics(self, **kwargs):
        data_keys = [
            "validation_success", "error", "last_run_start", "last_run_stop",
            "first_timestamp", "last_timestamp", "details"
        ]
        sql_query = sql.SQL(
            """INSERT INTO diagnostics ({insert_keys}) VALUES ({insert_data})
                                        ON CONFLICT
                                            (table_name, source)
                                        DO
                                            UPDATE SET {update_data}
                                        RETURNING *""").format(
                insert_keys=sql.SQL(",").join(
                    map(sql.Identifier, kwargs.keys())),
                insert_data=sql.SQL(",").join(
                    map(sql.Placeholder, kwargs.keys())),
                update_data=sql.SQL(",").join(
                    sql.Composed(
                        [sql.Identifier(k),
                         sql.SQL("="),
                         sql.Placeholder(k)]) for k in kwargs.keys()
                    if k in data_keys))

        self.execute(sql_query, kwargs)
        logger.debug("Updating diagnostics table with data: {}".format(
            list(kwargs.values())))
Example #12
0
def update_data(cursor, uuid, data):
    cursor.execute('SELECT user_id FROM posts WHERE uuid = %s;', [uuid])
    if cursor.fetchone() is None:
        raise RuntimeError('No such post.')

    post_keys = [
        'url',
        'post_date',
        'comments_number',
        'votes_number',
        'post_category',
    ]
    user_keys = [
        'username', 'user_karma', 'user_cakeday', 'post_karma', 'comment_karma'
    ]
    post_data = translate_keys(
        to='db', data={k: data[k]
                       for k in data.keys() if k in post_keys})
    user_data = translate_keys(
        to='db', data={k: data[k]
                       for k in data.keys() if k in user_keys})

    if post_data:
        sql_query = sql.SQL(
            'UPDATE posts SET {data} WHERE uuid = {id}').format(
                data=sql.SQL(', ').join(
                    sql.Composed([
                        sql.Identifier(k),
                        sql.SQL(" = "),
                        sql.Placeholder(k)
                    ]) for k in post_data.keys()),
                id=sql.Placeholder('id'))
        post_data.update(id=uuid)
        cursor.execute(sql_query, post_data)

    if user_data:
        cursor.execute('SELECT user_id FROM posts WHERE uuid = %s;', [uuid])
        user_id = cursor.fetchone()[0]
        sql_query = sql.SQL('UPDATE users SET {data} WHERE id = {id}').format(
            data=sql.SQL(', ').join(
                sql.Composed(
                    [sql.Identifier(k),
                     sql.SQL(" = "),
                     sql.Placeholder(k)]) for k in user_data.keys()),
            id=sql.Placeholder('id'))
        user_data.update(id=user_id)
        cursor.execute(sql_query, user_data)
Example #13
0
def test_as_string_1(conn):

    # All of these are similar to those described in the doctests in
    # psycopg2/lib/sql.py

    comp = sql.Composed([sql.SQL("insert into "), sql.Identifier("table")])
    result = comp.as_string(conn)
    assert result == 'insert into "table"'
Example #14
0
 def test_iter(self):
     obj = sql.Composed([sql.SQL("foo"), sql.SQL('bar')])
     it = iter(obj)
     i = next(it)
     self.assertEqual(i, sql.SQL('foo'))
     i = next(it)
     self.assertEqual(i, sql.SQL('bar'))
     self.assertRaises(StopIteration, it.__next__)
def componer_cadena(campos):
    cadena = []
    for campo in campos:
        aux = sql.Composed([sql.Identifier(campo), sql.Placeholder(campo)])
        aux = sql.SQL(" = ").join(aux)
        cadena.append(aux)
    salida = sql.SQL(" , ").join(cadena)
    return (salida)
Example #16
0
 def test_iter(self):
     obj = sql.Composed([sql.SQL("foo"), sql.SQL("bar")])
     it = iter(obj)
     i = next(it)
     self.assertEqual(i, sql.SQL("foo"))
     i = next(it)
     self.assertEqual(i, sql.SQL("bar"))
     self.assertRaises(StopIteration, next, it)
Example #17
0
    def generate_match_linkset_finish_sql(self):
        sim_fields_sqls = MatchingMethod.get_similarity_fields_sqls(
            self._linkset.matching_methods)

        sim_matching_methods_conditions_sqls = [
            match_method.similarity_threshold_sql
            for match_method in self._linkset.matching_methods
            if match_method.similarity_threshold_sql
        ]

        sim_grouping_conditions_sqls = [
            sql.SQL('{similarity} >= {threshold}').format(
                similarity=similarity, threshold=sql.Literal(threshold))
            for (threshold, similarity
                 ) in self._linkset.similarity_logic_ops_sql_per_threshold
        ]

        sim_condition_sql = get_sql_empty(
            sql.Composed([
                sql.SQL('WHERE '),
                sql.SQL(' AND ').join(sim_matching_methods_conditions_sqls +
                                      sim_grouping_conditions_sqls)
            ]),
            flag=sim_matching_methods_conditions_sqls
            or sim_grouping_conditions_sqls)

        return sql.SQL(
            cleandoc(""" DROP TABLE IF EXISTS linksets.{linkset} CASCADE;
                CREATE TABLE linksets.{linkset} AS
                SELECT linkset.*, similarity
                FROM linkset
                {sim_fields_sql}
                CROSS JOIN LATERAL coalesce({sim_logic_ops_sql}, 1) AS similarity 
                {sim_condition_sql};
                
                ALTER TABLE linksets.{linkset}
                ADD PRIMARY KEY (source_uri, target_uri),
                ADD COLUMN cluster_id integer,
                ADD COLUMN cluster_hash_id char(15),
                ADD COLUMN valid link_validity DEFAULT 'unchecked' NOT NULL,
                ADD COLUMN motivation text;

                ALTER TABLE linksets.{linkset} ADD COLUMN sort_order serial;

                CREATE INDEX ON linksets.{linkset} USING hash (source_uri);
                CREATE INDEX ON linksets.{linkset} USING hash (target_uri);
                CREATE INDEX ON linksets.{linkset} USING hash (valid);

                CREATE INDEX ON linksets.{linkset} USING btree (cluster_id);
                CREATE INDEX ON linksets.{linkset} USING btree (similarity);
                CREATE INDEX ON linksets.{linkset} USING btree (sort_order);

                ANALYZE linksets.{linkset};
            """) + '\n').format(
                linkset=sql.Identifier(self._job.table_name(self._linkset.id)),
                sim_fields_sql=sql.SQL('\n').join(sim_fields_sqls),
                sim_logic_ops_sql=self._linkset.similarity_logic_ops_sql,
                sim_condition_sql=sim_condition_sql)
Example #18
0
    def test_join(self):
        obj = sql.SQL(", ").join(
            [sql.Identifier('foo'),
             sql.SQL('bar'),
             sql.Literal(42)])
        self.assertTrue(isinstance(obj, sql.Composed))
        self.assertEqual(obj.as_string(self.conn), '"foo", bar, 42')

        obj = sql.SQL(", ").join(
            sql.Composed(
                [sql.Identifier('foo'),
                 sql.SQL('bar'),
                 sql.Literal(42)]))
        self.assertTrue(isinstance(obj, sql.Composed))
        self.assertEqual(obj.as_string(self.conn), '"foo", bar, 42')

        obj = sql.SQL(", ").join([])
        self.assertEqual(obj, sql.Composed([]))
Example #19
0
    def select_all(self, table, columns, where = None):
        q = sql.SQL("SELECT {} FROM {}").format(
                    sql.SQL(', ').join(sql.SQL(c) for c in columns),
                    self.__table_name__(table)
                    )
        if where:
            q = sql.Composed([q,sql.SQL(" WHERE {}").format(sql.SQL(' AND ').join(map(sql.SQL,where)))])

        return self.exec_and_fetch_all(q)
Example #20
0
def build_coordinate_filters(conn, dna_or_aa, coordinate_mode, coordinate_ranges):
    snv_cols = ["snp_str", "snv_name", "color", "pos", "ref", "alt"]
    snv_filter = []
    if dna_or_aa == constants["DNA_OR_AA"]["DNA"]:
        snv_table = "dna_snp"
    elif dna_or_aa == constants["DNA_OR_AA"]["AA"]:
        snv_cols.append("nt_pos")
        if coordinate_mode == constants["COORDINATE_MODES"]["COORD_GENE"]:
            snv_table = "gene_aa_snp"
            snv_cols.append("gene")
            snv_filter.append(
                sql.SQL('snp_data."gene" = {gene}').format(
                    gene=sql.Placeholder("selected_gene")
                )
            )
        elif coordinate_mode == constants["COORDINATE_MODES"]["COORD_PROTEIN"]:
            snv_table = "protein_aa_snp"
            snv_cols.append("protein")
            snv_filter.append(
                sql.SQL('snp_data."protein" = {protein}').format(
                    protein=sql.Placeholder("selected_protein")
                )
            )

    snv_filter = sql.SQL(" AND ").join(snv_filter)

    pos_filter = []
    pos_filter_injections = {}
    for i in range(len(coordinate_ranges)):
        pos_column = "pos" if dna_or_aa == constants["DNA_OR_AA"]["DNA"] else "nt_pos"
        pos_filter.append(
            sql.SQL(
                """
                (snp_data.{pos_column} >= {start} AND snp_data.{pos_column} <= {end})
                """
            ).format(
                pos_column=sql.Identifier(pos_column),
                start=sql.Placeholder("range_{}_start".format(i)),
                end=sql.Placeholder("range_{}_end".format(i)),
            )
        )
        # Build injection map
        pos_filter_injections["range_{}_start".format(i)] = coordinate_ranges[i][0]
        pos_filter_injections["range_{}_end".format(i)] = coordinate_ranges[i][1]

    pos_filter = sql.SQL(" OR ").join(pos_filter)

    # Compose final WHERE expression
    snv_filter = [snv_filter]
    # Only combine the snv_filter and pos_filter if the snv_filter exists
    if snv_filter[0].as_string(conn):
        snv_filter.append(sql.SQL(" AND "))

    snv_filter.append(pos_filter)
    snv_filter = sql.Composed(snv_filter)

    return snv_cols, snv_filter, snv_table, pos_filter_injections
Example #21
0
def get_sql_empty(sql_insert,
                  flag=True,
                  prefix=None,
                  suffix=None,
                  add_new_line=True):
    if not flag or not sql_insert or sql_insert == sql.SQL(
            '') or sql_insert == sql.Composed([]):
        return sql.SQL('')

    sql_composed = [sql_insert]
    if prefix:
        sql_composed.insert(0, prefix)
    if suffix:
        sql_composed.append(suffix)
    if add_new_line:
        sql_composed.insert(0, sql.SQL('\n'))

    return sql.Composed(sql_composed) if len(sql_composed) > 1 else sql_insert
Example #22
0
    def _groupby(self, clauses=()):
        """Combine group by clauses.

        Arguments:
            clauses: `("mt.field", ...)`
        """
        clauses += ("et.id", )
        result = sql.Composed(map(sql.SQL, clauses)).join(", ")
        return result
Example #23
0
 def test_delete(self):
     cursor_wrapper, cursor_mock = self.mock_cursor()
     sql.delete(cursor_wrapper, 'users', '1234')
     expected_sql = psql.Composed([
         psql.SQL('delete from '),
         psql.Identifier('users'),
         psql.SQL(' where id = %s')
     ])
     cursor_mock.execute.assert_called_with(expected_sql, ('1234', ))
Example #24
0
    def generate_match_linkset_sql(self):
        similarities_sqls, source_intermediates_sqls, target_intermediates_sqls = [], [], []
        for matching_method in self._linkset.matching_methods:
            field_name = matching_method.field_name
            if matching_method.similarity_sql:
                similarities_sqls.append(
                    sql.SQL('{}, {}').format(sql.Literal(field_name),
                                             matching_method.similarity_sql))

            if matching_method.is_intermediate:
                source_intermediates_sqls \
                    .append(sql.SQL('{}, array_agg(DISTINCT source.{})')
                            .format(sql.Literal(field_name), sql.Identifier(field_name + '_intermediate')))
                target_intermediates_sqls \
                    .append(sql.SQL('{}, array_agg(DISTINCT target.{})')
                            .format(sql.Literal(field_name), sql.Identifier(field_name + '_intermediate')))

        conditions_sql = self._linkset.with_matching_methods_recursive(
            lambda condition: sql.SQL('({})').format(
                sql.SQL('\n%s ' % condition['operator']).join(condition[
                    'children'])), lambda mm: mm['matching_method'].sql)

        if self._linkset.use_counter:
            conditions_sql = sql.Composed([
                conditions_sql,
                sql.SQL("\nAND increment_counter('linkset_count')")
            ])

        return sql.SQL(cleandoc(
            """ DROP MATERIALIZED VIEW IF EXISTS linkset CASCADE;
                CREATE MATERIALIZED VIEW linkset AS 
                SELECT CASE WHEN source.uri < target.uri THEN source.uri ELSE target.uri END AS source_uri,
                       CASE WHEN source.uri < target.uri THEN target.uri ELSE source.uri END AS target_uri,
                       CASE WHEN every(source.uri < target.uri) THEN 'source_target'::link_order
                            WHEN every(target.uri < source.uri) THEN 'target_source'::link_order
                            ELSE 'both'::link_order END AS link_order,
                       array_agg(DISTINCT source.collection) AS source_collections,
                       array_agg(DISTINCT target.collection) AS target_collections,
                       {source_intermediates} AS source_intermediates,
                       {target_intermediates} AS target_intermediates,
                       {similarities} AS similarities
                FROM source
                JOIN target ON source.uri != target.uri
                AND {conditions}
                GROUP BY source_uri, target_uri;
            """
        ) + '\n').format(
            linkset=sql.Identifier(self._job.table_name(self._linkset.id)),
            source_intermediates=sql.SQL('jsonb_build_object({})').format(sql.SQL(', ').join(source_intermediates_sqls)) \
                if source_intermediates_sqls else sql.SQL('NULL::jsonb'),
            target_intermediates=sql.SQL('jsonb_build_object({})').format(sql.SQL(', ').join(target_intermediates_sqls)) \
                if target_intermediates_sqls else sql.SQL('NULL::jsonb'),
            similarities=sql.SQL('jsonb_build_object({})').format(sql.SQL(', ').join(similarities_sqls)) \
                if similarities_sqls else sql.SQL('NULL::jsonb'),
            conditions=conditions_sql
        )
Example #25
0
 def test_remove_keys_nested(self):
     cursor_wrapper, cursor_mock = self.mock_cursor()
     sql.remove_keys(cursor_wrapper, 'users', 'foo-id', ['nested.key'])
     expected_sql = psql.Composed([
         psql.SQL('update '),
         psql.Identifier('users'),
         psql.SQL(' set jdoc=(jdoc #- %s) where id = %s')
     ])
     cursor_mock.execute.assert_called_with(expected_sql,
                                            ('{nested,key}', 'foo-id'))
Example #26
0
    def generate_schema_sql(self):
        schema_name_sql = sql.Identifier(
            self._job.schema_name(self._linkset.id))

        return sql.Composed([
            sql.SQL('CREATE SCHEMA IF NOT EXISTS {};\n').format(
                schema_name_sql),
            sql.SQL('SET search_path TO "$user", {}, public;\n').format(
                schema_name_sql),
        ])
Example #27
0
def get_etf_words(elim_dct={}):
    q = sql.Composed([sql.SQL("SELECT name FROM etfs")])
    res = stxdb.db_read_cmd(q.as_string(stxdb.db_get_cnx()))
    etf_words = ' '.join([x[0] for x in res])
    etf_words_list = [ x for x in etf_words.split() if x not in elim_dct ]
    dct = {}
    for w in etf_words_list:
        count = dct.get(w, 0)
        dct[w] = count + 1
    return dct
Example #28
0
    def generate_lens_sql(self):
        def spec_select_sql(id, type):
            default_columns = sql.SQL('source_uri, target_uri, link_order, source_collections, target_collections, '
                                      'source_intermediates, target_intermediates, similarities, valid')

            if type == 'linkset':
                return sql.SQL('SELECT {default_columns}, ARRAY[{id}] AS linksets, ARRAY[]::integer[] AS lenses '
                               'FROM linksets.{table}').format(
                    default_columns=default_columns,
                    id=sql.Literal(id),
                    table=sql.Identifier(self._job.table_name(id))
                )

            return sql.SQL('SELECT {default_columns}, linksets, ARRAY[{id}] AS lenses '
                           'FROM lenses.{table}').format(
                default_columns=default_columns,
                id=sql.Literal(id),
                table=sql.Identifier(self._job.table_name(id))
            )

        lens_sql = self._lens.with_lenses_recursive(
            lambda elem: self._lens_sql(elem['type'], elem['only_left'],
                                        sql.SQL('(\n{sql}\n)').format(sql=elem['left']),
                                        sql.SQL('(\n{sql}\n)').format(sql=elem['right'])),
            lambda spec: spec_select_sql(spec['id'], spec['type'])
        )

        sim_fields_sqls = MatchingMethod.get_similarity_fields_sqls(self._lens.matching_methods)

        sim_conditions_sqls = [sql.SQL('{similarity} >= {threshold}')
                                   .format(similarity=similarity, threshold=sql.Literal(threshold))
                               for (threshold, similarity) in self._lens.similarity_logic_ops_sql_per_threshold]

        sim_condition_sql = get_sql_empty(sql.Composed([sql.SQL('WHERE '), sql.SQL(' AND ').join(sim_conditions_sqls)]),
                                          flag=sim_conditions_sqls)

        return sql.SQL(cleandoc(
            """ DROP TABLE IF EXISTS lenses.{lens} CASCADE;
                CREATE TABLE lenses.{lens} AS
                SELECT lens.*, similarity
                FROM (
                    {lens_sql}
                ) AS lens
                {sim_fields_sql}
                CROSS JOIN LATERAL coalesce({sim_logic_ops_sql}, 1) AS similarity 
                {sim_condition_sql};
            """
        ) + '\n').format(
            lens=sql.Identifier(self._job.table_name(self._lens.id)),
            lens_sql=lens_sql,
            sim_fields_sql=sql.SQL('\n').join(sim_fields_sqls),
            sim_logic_ops_sql=self._lens.similarity_logic_ops_sql,
            sim_condition_sql=sim_condition_sql
        )
Example #29
0
    def delete(self, table, where, returning = None):
        sql_str = "DELETE FROM {} WHERE {}"
        q = sql.SQL(sql_str).format(self.__table_name__(table),
                    sql.SQL(' AND ').join(map(sql.SQL,where))
                    )

        if returning:
            q = sql.Composed([q,sql.SQL(" RETURNING {}").format(sql.Identifier(returning))])
            return self.exec_and_fetch(q)
        else:
            self.execute(q)
Example #30
0
 def insert(self, table, columns, values, returning = None):
     sql_str = "INSERT INTO {} ({}) VALUES ({})"
     q = sql.SQL(sql_str).format(
                 self.__table_name__(table),
                 sql.SQL(', ').join(map(sql.Identifier, columns)),
                 sql.SQL(', ').join(sql.Placeholder() * len(columns))
                 )
     if returning:
         q = sql.Composed([q,sql.SQL(" RETURNING {}").format(sql.Identifier(returning))])
         return self.exec_and_fetch(q,values)
     else:
         self.execute(q,values)