def get_location_id(dbcursor: sqlite3.Cursor, keyword: str, prompt: str = None): """Gets a location lcode from the user.""" # get exact match locde dbcursor.execute("SELECT * " "FROM locations WHERE lcode LIKE ?", (keyword, )) location = dbcursor.fetchall() if location: return location[0][0] # get matching locations, since it's not a lcode kw = '%' + keyword + '%' dbcursor.execute( "SELECT * " "FROM locations " "WHERE city LIKE ? OR prov LIKE ? OR address LIKE ?", (kw, kw, kw)) locations = dbcursor.fetchall() # display and get user selection if more than one if len(locations) > 1: if prompt: print(prompt) return get_selection(locations)[0] elif locations: return locations[0][0] else: raise ValueNotFoundException("No location for " + keyword)
def find_all_related_transformation(cursor: sqlite3.Cursor, transIDs: List[str]): """A recursive function that find all related transformations given a list of transformation IDs in the database. Args: cursor: sqlite3.Cursor. Cursor of current sqlite3 database connection. transIDs: List[str]. A list of transformation IDs. """ transQueryStr = "(" + ', '.join(transIDs) + ')' cursor.execute(f""" SELECT node_name FROM Log_Transformation WHERE trans_id in {transQueryStr} and operation_type in ('ADD_OPERAND', 'REMOVE_OPERAND') GROUP BY node_name """) rows = cursor.fetchall() nodesList = ["'" + r[0] + "'" for r in rows] transQueryStr = "(" + ', '.join(nodesList) + ')' cursor.execute(f""" SELECT trans_id FROM Log_Transformation WHERE node_name in {transQueryStr} and operation_type in ('ADD_OPERAND', 'REMOVE_OPERAND') GROUP BY trans_id """) rows = cursor.fetchall() newTransIDs = [str(r[0]) for r in rows] if sorted(newTransIDs) != sorted(transIDs): transIDs = find_all_related_transformation(cursor, newTransIDs) return transIDs
def partition_geocode(con: sqlite3.Connection, cur: sqlite3.Cursor, quarter: str, county_cht: str): """ Geocode address of the same county in quarter fashion """ cur.execute('''SELECT 土地區段位置或建物區門牌 FROM "{0}/TRX" WHERE 縣市 = ? GROUP BY 土地區段位置或建物區門牌;'''.format(quarter), (county_cht,)) for address, in cur.fetchall(): cur.execute('''SELECT GEO.編號 FROM "{0}/TRX" AS TRX, "{0}/GEO" AS GEO WHERE TRX.編號 = GEO.編號 AND TRX.土地區段位置或建物區門牌 = ? AND GEO.LAT_Avg ISNULL;'''.format(quarter), (address,)) identities = cur.fetchall() if not identities: continue print("[%d] "%(len(identities)) + address) try: results = selective_geocode(address) except geo.AddressError: continue if len(results["lat"]) != 5 or len(results["lon"]) != 5: continue results["lat"].append(sum(results["lat"]) / len(results["lat"])) results["lon"].append(sum(results["lon"]) / len(results["lon"])) combined = [num for zipped in zip(results["lat"], results["lon"]) for num in zipped] values = [(tuple(combined) + identity) for identity in identities] cur.executemany('''UPDATE "{0}/GEO" SET LAT_1 = ?, LON_1 = ?, LAT_2 = ?, LON_2 = ?, LAT_3 = ?, LON_3 = ?, LAT_4 = ?, LON_4 = ?, LAT_5 = ?, LON_5 = ?, LAT_Avg = ?, LON_Avg = ? WHERE 編號 = ?;'''.format(quarter), values) con.commit()
async def import_roles(conn: asyncpg.Connection, c: sqlite3.Cursor): log.info("Importing roles...") auto_roles = [] joinable_roles = [] log.debug("Gathering auto roles from sqlite...") c.execute("SELECT server_id, role_id, guild FROM auto_roles") rows = c.fetchall() for server_id, role_id, guild in rows: auto_roles.append((server_id, role_id, guild)) log.debug(f"Collected {len(auto_roles):,} records from old database.") log.info("Copying records to auto roles table") res = await conn.copy_records_to_table( "role_auto", records=auto_roles, columns=["server_id", "role_id", "rule"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") log.debug("Gathering joinable roles from sqlite...") c.execute("SELECT server_id, role_id FROM joinable_roles") rows = c.fetchall() for server_id, role_id in rows: joinable_roles.append((server_id, role_id)) log.debug(f"Collected {len(joinable_roles):,} records from old database.") log.info("Copying records to joinable roles table") res = await conn.copy_records_to_table("role_joinable", records=joinable_roles, columns=["server_id", "role_id"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") log.info("Finished importing roles.")
async def import_characters(conn: asyncpg.Connection, c: sqlite3.Cursor, new_ids: Dict[int, int]): log.info("Importing characters...") # Dictionary that maps character names to their SQL ID old_ids = {} chars = [] log.debug("Gathering character records from sqlite...") c.execute("""SELECT id, user_id, name, level, vocation, world, guild FROM chars ORDER By id ASC""") rows = c.fetchall() for char_id, user_id, name, level, vocation, world, guild in rows: chars.append((user_id, name, level, vocation, world, guild)) old_ids[name] = char_id log.debug(f"Collected {len(chars):,} records from old database.") log.info("Copying records to character table") res = await conn.copy_records_to_table("character", records=chars, columns=["user_id", "name", "level", "vocation", "world", "guild"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") new_chars = await conn.fetch('SELECT id, name FROM "character"') log.debug("Generating old id to new id mapping...") new_ids.clear() for new_id, name in new_chars: new_ids[old_ids[name]] = new_id log.debug("Old id to new id mapping generated.") ids = 1 deaths = [] killers = [] log.debug("Gathering death records from sqlite...") c.execute("""SELECT char_id, level, killer, date, byplayer FROM char_deaths ORDER BY date ASC""") rows = c.fetchall() # This doesn't seem very safe to do, maybe it would be better to import deaths the old way for char_id, level, killer, date, byplayer in rows: byplayer = byplayer == 1 date = datetime.datetime.fromtimestamp(date, datetime.timezone.utc) deaths.append((new_ids[char_id], level, date)) killers.append((ids, killer, byplayer)) ids += 1 log.debug(f"Collected {len(deaths):,} records from old database.") log.info("Copying records to deaths table.") res = await conn.copy_records_to_table("character_death", records=deaths, columns=["character_id", "level", "date"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") log.info("Copying records to death killers table.") res = await conn.copy_records_to_table("character_death_killer", records=killers, columns=["death_id", "name", "player"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") log.debug("Gathering level up records from sqlite...") c.execute("""SELECT char_id, level, date FROM char_levelups ORDER BY date ASC""") rows = c.fetchall() levelups = [] for char_id, level, date in rows: date = datetime.datetime.fromtimestamp(date, datetime.timezone.utc) levelups.append((new_ids[char_id], level, date)) log.debug(f"Collected {len(levelups):,} records from old database.") log.info("Copying records to level ups table.") res = await conn.copy_records_to_table("character_levelup", records=levelups, columns=["character_id", "level", "date"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") log.info("Finished importing characters.")
def get_previous_urls(curs: sqlite3.Cursor) -> Set[str]: """Return set of previously downloaded, attempted or old urls.""" curs.execute('SELECT url FROM articles') urls = set(map(itemgetter(0), curs.fetchall())) curs.execute('SELECT url FROM bad_articles') urls.update(map(itemgetter(0), curs.fetchall())) curs.execute('SELECT url FROM old_articles') urls.update(map(itemgetter(0), curs.fetchall())) return urls
def exec_withdrawal(cursor: sqlite3.Cursor) -> None: cursor.execute( 'select address from withdrawal_req where completed == 0') addresses = {r[0] for r in cursor.fetchall()} if len(addresses) == 0: return params = {} for address in addresses: cursor.execute( 'select amount from withdrawal_req where address == ? and completed == 0', (address, )) req_amounts = [Decimal(str(r[0])) for r in cursor.fetchall()] amount = Decimal('0.0') for req_amount in req_amounts: amount = amount + req_amount params[address] = float(amount) try: txid = coinrpc.call('sendmany', '', params, MINCONF, '', list(addresses)) except: txid = None if txid is None: cursor.execute( 'select account, value from withdrawal_req where completed == 0' ) for req in cursor.fetchall(): account = req[0] value = Decimal(str(req[1])) cursor.execute( 'select balance from account_wallet where account == ?', (account, )) balance = Decimal(str(cursor.fetchone()[0])) balance = balance + value cursor.execute( 'update account_wallet set balance = ? where account == ?', (float(balance), account)) cursor.execute( 'update withdrawal_req set completed = -1 where completed == 0' ) return cursor.execute( 'update withdrawal_req set completed = 1 where completed == 0')
def s_where_with_type_lite(cur: sqlite3.Cursor, table: str, dtypes: dict, row: dict) -> pd.DataFrame: # sqlite3 also needs type to select data correctly, which type is correct needs try and error, sometimes int or str can't not be distinquished explicitly sql = 'SELECT * FROM `{}` where {}'.format(table, join(' and ', ["{}=?".format(key) for key in row.keys()])) print(sql) row = as_type_dict(dtypes, row) print(row) cur.execute(sql, tuple(row.values())) data = cur.fetchall() cur.execute("SELECT name FROM pragma_table_info('{}')".format(table)) cols = cur.fetchall() return pd.DataFrame(data, columns = [col[0] for col in cols])
def database_table_to_pandas_table(cursor: sqlite3.Cursor, table_name: str) -> pd.DataFrame: cursor.execute(f'PRAGMA table_info({table_name});') column_names = [x[1] for x in cursor.fetchall()] cursor.execute(f'SELECT * FROM {table_name};') values = cursor.fetchall() data = { column: [k[ix] for k in values] for ix, column in enumerate(column_names) } return pd.DataFrame(data)
def hard_code_save_to_db(cursor: sqlite3.Cursor, all_github_jobs: List[Dict[str, Any]], all_stackoverflow_job, all_location): # in the insert statement below we need one '?' for each column, then we will use a second param with each of the values # when we execute it. SQLITE3 will do the data sanitization to avoid little bobby tables style problems insert_statement = f"""INSERT INTO hardcode_github_jobs( id, type, url, created_at, company, company_url, location, title, description, how_to_apply, company_logo) VALUES(?,?,?,?,?,?,?,?,?,?,?)""" for job_info in all_github_jobs: # first turn all the values from the jobs dict into a tuple data_to_enter = tuple(job_info.values()) cursor.execute(insert_statement, data_to_enter) sql = ''' INSERT INTO stackoverflow_jobs(guid,link,date,title,description) VALUES(?,?,?,?,?) ''' for job_info in all_stackoverflow_job: print(job_info) if not 'created_at' in job_info: job_info['created_at'] = job_info['published'] data_enter = (job_info['guid'], job_info['link'], job_info['created_at'], job_info['title'], job_info['description']) cursor.execute(sql, data_enter) sql1 = "SELECT title, FROM stackoverflow_jobs;" cursor.execute(sql1) cursor.fetchone() cursor.fetchall() for result in cursor.execute(sql1): print(result) sql2 = ''' INSERT INTO location(name,latitude,longitude) VALUES(?,?,?) ''' for job_info in all_location: print(job_info) if not 'created_at' in job_info: job_info['created_at'] = job_info['published'] data_enter1 = (job_info['name'], job_info['latitude'], job_info['longitude']) cursor.execute(sql, data_enter1) sql3 = "SELECT title, FROM location;" cursor.execute(sql3) cursor.fetchone() cursor.fetchall() for result in cursor.execute(sql3): print(result)
async def import_events(conn: asyncpg.Connection, c: sqlite3.Cursor, new_char_ids: Dict[int, int]): log.info("Importing events...") events = [] subscribers = [] participants = [] new_event_ids = {} i = 1 log.debug("Gathering event records from sqlite...") c.execute("SELECT id, creator, name, start, active, status, description, server, joinable, slots FROM events") rows = c.fetchall() for event_id, creator, name, start, active, status, description, server, joinable, slots in rows: new_event_ids[event_id] = i start = datetime.datetime.fromtimestamp(start, datetime.timezone.utc) active = bool(active) joinable = bool(joinable) status = 4 - status events.append((creator, name, start, active, description, server, joinable, slots, status)) i += 1 log.debug(f"Collected {len(events):,} records from old database.") log.info("Copying records to events table") res = await conn.copy_records_to_table("event", records=events, columns=["user_id", "name", "start", "active", "description", "server_id", "joinable", "slots", "reminder"]) log.debug(f"Copied {get_affected_count(res):,} records successfully.") log.debug("Gathering event subscribers from sqlite...") c.execute("SELECT event_id, user_id FROM event_subscribers") rows = c.fetchall() for event_id, user_id in rows: subscribers.append((new_event_ids[event_id], user_id)) log.debug(f"Collected {len(subscribers):,} records from old database.") log.info("Copying records to event subscribers table") res = await conn.copy_records_to_table("event_subscriber", records=subscribers, columns=["event_id", "user_id"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") log.debug("Gathering event participants from sqlite...") c.execute("SELECT event_id, char_id FROM event_participants") rows = c.fetchall() for event_id, char_id in rows: participants.append((new_event_ids[event_id], new_char_ids[char_id])) log.debug(f"Collected {len(participants):,} records from old database.") log.info("Copying records to event participants table") res = await conn.copy_records_to_table("event_participant", records=participants, columns=["event_id", "character_id"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") log.info("Finished importing events.")
def media_check_files(cursor: Cursor, path: str) -> None: cursor.execute(QRY_MEDIA, []) result = cursor.fetchall() count_confirmed = 0 count_missing = 0 count_errors = 0 for row in result: id, ftype, size, crc, width, height, ext, title, place = row media_filename = 'P{}_{}_{}.{}'.format(id, width, height, ext) media_path = os.path.join(path, media_filename) print(media_path) if os.path.isfile(media_path): # check file size size = int(size) actual_size = os.path.getsize(media_path) if actual_size != size: count_errors += 1 print('Wrong size: {} != {} ({})'.format( actual_size, size, media_path)) # check CRC actual_crc = media_crc32_from_file(media_path) actual_crc = ~actual_crc & 0xffff_ffff crc = int(crc) if actual_crc != crc: count_errors += 1 print('Wrong CRC: {} != {} ({})'.format( actual_crc, crc, media_path)) else: count_confirmed += 1 else: count_missing += 1 print('Confirmed: ' + str(count_confirmed)) print('Errors: ' + str(count_errors)) print('Missing: ' + str(count_missing))
def __sql_fetchall(self, cursor: sqlite3.Cursor) -> tuple: """ This *private* method fetches all the records from the database after the previously executed sql statement. This method returns a 3-tuple: (1) boolean indicating if the sql statement was executed successfully (2) string with a message indicating reason for failure (3) list of tuples containing the data fetched from the database :param cursor: the cursor pointing to the database :return: (boolean, string, list of tuples) """ # 6/28/21 - added if statement to return empty list if no records are returned success = False message = '' data = list() try: # WARNING: Make sure data is not None after the call data = cursor.fetchall() if not data: data = list() success = True except Error as e: data = list() success = False message = str(e) return success, message, data
def test_insert_mod_when_calling_update_mod_but_does_not_exist(mod: Mod, sqlite: Sqlite, cursor: sqlite3.Cursor): sqlite.update_mod(mod) expected = [row(mod.id, mod.sites, mod.upload_time, 1)] cursor.execute("SELECT * FROM mod") assert expected == cursor.fetchall()
def test_skip_insert_mod_when_pretend(mod: Mod, sqlite: Sqlite, cursor: sqlite3.Cursor): config.pretend = True sqlite.insert_mod(mod) cursor.execute("SELECT * FROM mod") config.pretend = False assert [] == cursor.fetchall()
def _list_tables(self, cursor: Cursor) -> Dict[str, SQLiteTableProxy]: cursor.execute(self.SQL_LIST_TABLES) tablenames = [n[0] for n in cursor.fetchall()] db = {} for name in tablenames: db[name] = SQLiteTableProxy(self._conn, name) return db
def fetch_data(self, cursor: sqlite3.Cursor) -> Any: parameters = (self.options.algorithm_name, self.options.algorithm_parameters, self.options.stage, self.options.environment) cursor.execute( """ SELECT environment.name, benchmarkRun.runIndex, algorithm.name, algorithm.parameters, algorithm.compiler, algorithm.features, benchmark.stage, sequentialBenchmark.averageDuration, sequentialBenchmarkIteration.iteration, sequentialBenchmarkIteration.duration FROM benchmark INNER JOIN algorithm ON algorithm.id = benchmark.algorithm INNER JOIN benchmarkRun ON benchmarkRun.id = benchmark.benchmarkRun INNER JOIN environment ON environment.id = benchmarkRun.environment INNER JOIN sequentialBenchmark ON sequentialBenchmark.benchmark = benchmark.id INNER JOIN sequentialBenchmarkIteration ON sequentialBenchmarkIteration.sequentialBenchmark = sequentialBenchmark.id WHERE algorithm.name = ? AND algorithm.parameters = ? AND benchmark.stage = ? AND environment.name = ? """, parameters) return cursor.fetchall()
def get_by_municipality_part(c: sqlite3.Cursor, query: str) -> List[Dict[str, Any]]: """ Najde lokality podle nazvu casti obce. :param c: :param query: :return: vyslledky :-) """ c.execute( """ SELECT co.nazev as municipality_part_name, co.kod as municipality_part_id, ob.nazev as municipality_name, ob.kod as municipality_code, ok.nazev as district_name, ok.kod as district_code, k.nazev as region_name, k.kod as region_id FROM casti_obci co JOIN obce ob ON (UPPER(SUBSTR(co.nazev, 1, 1)) || LOWER(SUBSTR(co.nazev, 2)) LIKE ? AND ob.kod = co.obec_kod) JOIN okresy ok ON ok.kod = ob.okres_kod JOIN vusc k ON k.kod = ok.vusc_kod ORDER BY k.nazev, ok.nazev, ob.nazev, co.nazev LIMIT 11 """, ("{}%".format(query.capitalize()), )) return [dict(x) for x in c.fetchall()]
def format_timeboxed_result(result: Cursor) -> [dto.UserBotDateResult]: """ Helper function for handling the results of timeboxed queries. Currently only supports timeboxing by week (%Y-%W in SQLite). This function will be generalized as needed/as the timeboxing query functionality improves to support other types of boxes. Currently it has a very specific use case--see its usages in `database.py`. """ # NOTE: I am assuming that the Python %W is equivalent # to the SQlite %W (i.e., weeks start on Monday). This is # not specified in official SQLite documentation, but is # supported here: https://www.techonthenet.com/sqlite/functions/strftime.php # Build objects by going in sequence. # They will have already been sorted by date. res = [] prev_date: datetime.datetime = None for row in result.fetchall(): # Parse date out of the %Y-%W sqlite format. # This requires setting the weekday to "1" (Monday). # See https://stackoverflow.com/a/17087427 curr_date = datetime.datetime.strptime(row[0] + '-1', '%Y-%W-%w') if curr_date != prev_date: res.append(dto.UserBotDateResult(curr_date)) prev_date = curr_date if row[1] == 'USER': res[-1].user = row[2] elif row[1] == 'BOT': res[-1].bot = row[2] return res
def plot_batch_performance(cursor: Cursor, exam: int) -> None: ''' Plot the performance of all students in a particular exam Args: cursor (Cursor): sqlite3 Cursor object exam (int): uid of the exam concerned ''' cursor.execute(___(f'SELECT student,marks FROM marks WHERE exam={exam}')) marks_list = cursor.fetchall() if not marks_list: logger.warning(f'No marks entries exist for exam with uid = {exam}') return x_axis = [f'{i[0]}' for i in marks_list] y_axis = [i[1] for i in marks_list] plt.stem(x_axis, y_axis) logger.info(f'Plotting stem graph with \nx = {x_axis} \n\ny = {y_axis}') plt.xlabel('Students') plt.ylabel('Marks') plt.show()
def singleColPagingItr(cursor: Cursor, table: str, columnName: str, columnIndex: int, columnEscaped: bool, selectRows: str = "*", pageSize: int = 1000, offset: int = 0): """Executes a sqlite SELECT using single-column paging to minimize memory demands. :param cursor: db cursor :param table: table to query :param columnName: paging column name :param columnIndex: paging column 0-based index :param columnEscaped: flag specifying whether paging column data type requires escaping :param selectRows: row names to return :param pageSize: limit on the number of records returned in a single page :param offset: SQL offset specifying number of rows to skip """ prevVal = "" rows = True while rows: _fmtPrevVal = "\"{}\"".format(prevVal) if columnEscaped else prevVal q = SINGLE_COL_PAGED_SELECT_QRY.format(selectRows, table, columnName, _fmtPrevVal, pageSize, offset) cursor.execute(q) rows = cursor.fetchall() for r in rows: yield r if rows: prevVal = rows[-1][columnIndex]
def fetch_all_inputs(cursor: sqlite3.Cursor) -> List[Dict[str, Any]]: cursor.execute(""" SELECT algorithm.name, algorithm.parameters, microBenchmarkMeasurement.region FROM benchmark INNER JOIN algorithm ON algorithm.id = benchmark.algorithm INNER JOIN benchmarkRun ON benchmarkRun.id = benchmark.benchmarkRun INNER JOIN environment ON environment.id = benchmarkRun.environment INNER JOIN microBenchmark ON microBenchmark.benchmark = benchmark.id INNER JOIN microBenchmarkMeasurement ON microBenchmarkMeasurement.microBenchmark = microBenchmark.id INNER JOIN microBenchmarkEvent ON microBenchmarkEvent.microBenchmarkMeasurement = microBenchmarkMeasurement.id WHERE microBenchmarkMeasurement.region IN ("crypto_kem_keypair", "crypto_kem_enc", "crypto_kem_dec", "crypto_dh_keypair", "crpyto_dh_enc") AND microBenchmarkEvent.event = "cache-misses" AND microBenchmarkEvent.value >= 0 GROUP BY algorithm.name, algorithm.parameters, microBenchmarkMeasurement.region """) keys = ["algorithm_name", "algorithm_parameters", "region"] rows = cursor.fetchall() inputs = [] for row in rows: inputs.append({keys[i]: value for i, value in enumerate(row)}) return inputs
def fetch_data(self, cursor: sqlite3.Cursor) -> Any: parameters = (self.options.algorithm_name, self.options.algorithm_parameters, self.options.environment, self.options.region, self.options.event) cursor.execute( """ SELECT algorithm.compiler, algorithm.features, microBenchmarkEvent.value FROM benchmark INNER JOIN algorithm ON algorithm.id = benchmark.algorithm INNER JOIN benchmarkRun ON benchmarkRun.id = benchmark.benchmarkRun INNER JOIN environment ON environment.id = benchmarkRun.environment INNER JOIN microBenchmark ON microBenchmark.benchmark = benchmark.id INNER JOIN microBenchmarkMeasurement ON microBenchmarkMeasurement.microBenchmark = microBenchmark.id INNER JOIN microBenchmarkEvent ON microBenchmarkEvent.microBenchmarkMeasurement = microBenchmarkMeasurement.id WHERE algorithm.name = ? AND algorithm.parameters = ? AND environment.name = ? AND microBenchmarkMeasurement.region = ? AND microBenchmarkEvent.event = ? """, parameters) return cursor.fetchall()
def fetch_data(self, cursor: sqlite3.Cursor) -> Any: parameters = (self.options.algorithm_name, self.options.algorithm_parameters, self.options.stage) cursor.execute( """ SELECT environment.name, algorithm.compiler, algorithm.features, SUM(sequentialBenchmark.iterations), AVG(sequentialBenchmark.averageDuration) FROM benchmark INNER JOIN algorithm ON algorithm.id = benchmark.algorithm INNER JOIN benchmarkRun ON benchmarkRun.id = benchmark.benchmarkRun INNER JOIN environment ON environment.id = benchmarkRun.environment INNER JOIN sequentialBenchmark ON sequentialBenchmark.benchmark = benchmark.id WHERE algorithm.name = ? AND algorithm.parameters = ? AND benchmark.stage = ? GROUP BY environment.id, algorithm.id """, parameters) return cursor.fetchall()
def get_articles(curs: sqlite3.Cursor) -> Iterator[newspaper.Article]: """Yield articles from news sites and ground truth.""" curs.execute('SELECT url FROM ground_truth') previous_urls = set(map(itemgetter(0), curs.fetchall())) with open(GROUND_TRUTH_FILE_NAME, 'r') as f: reader = csv.reader(f) for url, label in reader: if url in previous_urls: continue print(f'Downloading {url}') article = newspaper.Article(url) article.label = label yield article previous_urls.add(url) news_sites = build_sources(URL_FILE_NAME) previous_urls = get_previous_urls(curs) for news_site in news_sites: news_site.articles[:] = [article for article in news_site.articles if article.url not in previous_urls] num_articles = len(news_site.articles) print(f'Downloading {num_articles} articles from {news_site.url}') for i in reversed(range(len(news_site.articles))): article = news_site.articles[i] previous_urls.add(article.url) yield article del news_site.articles[i]
def fetch_all_inputs(cursor: sqlite3.Cursor) -> List[Dict[str, Any]]: cursor.execute(""" SELECT algorithm.name, algorithm.parameters, benchmark.stage, environment.name FROM benchmark INNER JOIN algorithm ON algorithm.id = benchmark.algorithm INNER JOIN benchmarkRun ON benchmarkRun.id = benchmark.benchmarkRun INNER JOIN environment ON environment.id = benchmarkRun.environment INNER JOIN sequentialBenchmark ON sequentialBenchmark.benchmark = benchmark.id GROUP BY algorithm.name, algorithm.parameters, benchmark.stage, environment.name """) keys = [ "algorithm_name", "algorithm_parameters", "stage", "environment" ] rows = cursor.fetchall() inputs = [] for row in rows: inputs.append({keys[i]: value for i, value in enumerate(row)}) return inputs
def fetch_data(self, cursor: sqlite3.Cursor) -> Any: parameters = (self.options.algorithm_name, self.options.algorithm_parameters, self.options.environment, self.options.event) cursor.execute( """ SELECT algorithm.compiler, algorithm.features, benchmark.stage, microBenchmarkMeasurement.region, microBenchmarkEvent.value FROM algorithm INNER JOIN benchmark ON benchmark.algorithm = algorithm.id INNER JOIN benchmarkRun ON benchmarkRun.id = benchmark.benchmarkRun INNER JOIN environment ON environment.id = benchmarkRun.environment INNER JOIN microBenchmark ON microBenchmark.benchmark = benchmark.id INNER JOIN microBenchmarkMeasurement ON microBenchmarkMeasurement.microBenchmark = microBenchmark.id INNER JOIN microBenchmarkEvent ON microBenchmarkEvent.microBenchmarkMeasurement = microBenchmarkMeasurement.id WHERE algorithm.name = ? AND algorithm.parameters = ? AND environment.name = ? AND microBenchmarkEvent.event = ? AND microBenchmarkEvent.value >= 0 AND microBenchmarkMeasurement.region NOT IN ("crypto_kem_keypair", "crypto_kem_enc", "crypto_kem_dec") """, parameters) return cursor.fetchall()
def aggregate_avg(cursor: sqlite3.Cursor, aggregator_key: str) -> TimingsResultSet: cursor.execute( 'SELECT phase, AVG(duration) AS duration FROM timings ' 'WHERE aggregator_key = ? GROUP BY phase ORDER BY ordinal', [aggregator_key]) return cursor.fetchall()
def _execute_select_query( cursor: sqlite3.Cursor, bookmark_ids: Optional[List[UUID]], tag_denominator: str, ) -> List[Any]: query = (""" SELECT b.id, b.url, b.title, b.description, t.tags, b.checkedDatetime, b.lastVisitDatetime, b.visitCount, b.statusCode FROM bookmark AS b LEFT JOIN ( SELECT bookmarkId, GROUP_CONCAT(name, "%s") AS tags FROM tag GROUP BY bookmarkId ) AS t ON b.id = t.bookmarkId """ % tag_denominator) if bookmark_ids: query += "WHERE b.id IN (%s)" % ",".join(["?"] * len(bookmark_ids)) cursor.execute(query, [str(bid) for bid in bookmark_ids]) else: cursor.execute(query) return cursor.fetchall()
def program_exists(self, cursor: sqlite3.Cursor = None) -> bool: """ Checks whether or not there is data in the database for the program of type func, named name. :param cursor: the database cursor :return: True if the program exists, otherwise False """ connection = None if cursor is None: connection = sqlite3.connect(DB_PATH) cursor = connection.cursor() try: cursor.execute("SELECT ID, ProgName, ProgType from map") rows = cursor.fetchall() except sqlite3.OperationalError: if connection is not None: connection.close() return False if connection is not None: connection.close() names = [row[1] for row in rows] types = [row[2] for row in rows] try: index = names.index(self.file_name) if types[index] == self.function_type.lower(): return True except ValueError: return False
async def import_ignored_channels(conn: asyncpg.Connection, c: sqlite3.Cursor): log.info("Importing ignored channels...") channels = [] log.debug("Gathering ignored channels from sqlite...") c.execute("SELECT server_id, channel_id FROM ignored_channels") rows = c.fetchall() for server_id, channel_id in rows: channels.append((server_id, channel_id)) log.debug(f"Collected {len(channels):,} records from old database.") log.info("Copying records to ignored channels table") res = await conn.copy_records_to_table("ignored_entry", records=channels, columns=["server_id", "entry_id"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") log.info("Finished importing channels.")
def update_grammar_with_table_values(grammar_dictionary: Dict[str, List[str]], schema: Dict[str, List[TableColumn]], cursor: Cursor) -> None: for table_name, columns in schema.items(): for column in columns: cursor.execute(f'SELECT DISTINCT {table_name}.{column.name} FROM {table_name}') results = [x[0] for x in cursor.fetchall()] if column_has_string_type(column): productions = sorted([f'"{str(result)}"' for result in results], reverse=True) grammar_dictionary["string"].extend(productions) elif column_has_numeric_type(column): productions = sorted([f'"{str(result)}"' for result in results], reverse=True) grammar_dictionary["number"].extend(productions)
async def import_roles(conn: asyncpg.Connection, c: sqlite3.Cursor): log.info("Importing roles...") auto_roles = [] joinable_roles = [] log.debug("Gathering auto roles from sqlite...") c.execute("SELECT server_id, role_id, guild FROM auto_roles") rows = c.fetchall() for server_id, role_id, guild in rows: auto_roles.append((server_id, role_id, guild)) log.debug(f"Collected {len(auto_roles):,} records from old database.") log.info("Copying records to auto roles table") res = await conn.copy_records_to_table("role_auto", records=auto_roles, columns=["server_id", "role_id", "rule"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") log.debug("Gathering joinable roles from sqlite...") c.execute("SELECT server_id, role_id FROM joinable_roles") rows = c.fetchall() for server_id, role_id in rows: joinable_roles.append((server_id, role_id)) log.debug(f"Collected {len(joinable_roles):,} records from old database.") log.info("Copying records to joinable roles table") res = await conn.copy_records_to_table("role_joinable", records=joinable_roles, columns=["server_id", "role_id"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") log.info("Finished importing roles.")
async def import_server_properties(conn: asyncpg.Connection, c: sqlite3.Cursor): properties = [] prefixes = [] times = [] log.debug("Gathering server property records from sqlite...") log.info("Importing server properties...") c.execute("SELECT server_id, name, value FROM server_properties") rows = c.fetchall() for server_id, key, value in rows: server_id = int(server_id) if key == "prefixes": prefixes.append((server_id, json.loads(value))) continue if key == "times": value = json.loads(value) for entry in value: times.append((server_id, entry["timezone"], entry["name"])) continue elif key in ["events_channel", "levels_channel", "news_channel", "welcome_channel", "ask_channel", "announce_channel", "announce_level"]: value = int(value) elif key in ["watched_message", "watched_channel"]: continue elif key == "commandsonly": value = bool(value) properties.append((server_id, key, value)) log.debug(f"Collected {len(properties):,} properties, {len(times):,} timezones and {len(prefixes):,} prefixes" f" from old database.") log.info("Copying records to server property table") res = await conn.copy_records_to_table("server_property", records=properties, columns=["server_id", "key", "value"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") log.info("Copying records to server prefixes table") res = await conn.copy_records_to_table("server_prefixes", records=prefixes, columns=["server_id", "prefixes"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") log.info("Copying records to server timezone table") res = await conn.copy_records_to_table("server_timezone", records=times, columns=["server_id", "zone", "name"]) log.info(f"Copied {get_affected_count(res):,} records successfully.") log.info("Finished importing server properties.")
def clean_up_old_db(c: sqlite3.Cursor): log.info("Cleaning up old database") # Clean up characters c.execute("SELECT min(id), name as id FROM chars GROUP BY name HAVING COUNT(*) > 1") rows = c.fetchall() log.debug("Removing duplicate characters...") for char_id, name in rows: c.execute("""UPDATE char_levelups SET char_id = ? WHERE char_id IN (SELECT id FROM chars WHERE name = ? ORDER BY id LIMIT 1)""", (char_id, name)) c.execute("""UPDATE char_deaths SET char_id = ? WHERE char_id IN (SELECT id FROM chars WHERE name = ? ORDER BY id LIMIT 1)""", (char_id, name)) c.execute("""UPDATE event_participants SET char_id = ? WHERE char_id IN (SELECT id FROM chars WHERE name = ? ORDER BY id LIMIT 1)""", (char_id, name)) c.execute("DELETE FROM chars WHERE name = ? AND id != ?", (name, char_id)) log.info(f"Removed {len(rows):,} duplicate characters") # Clean up deaths log.debug("Removing duplicate deaths...") c.execute("""DELETE FROM char_deaths WHERE rowid NOT IN (SELECT min(rowid) FROM char_deaths GROUP BY char_id, date)""") log.info(f"Removed {c.rowcount:,} duplicate deaths") log.debug("Removing orphaned deaths...") c.execute("""DELETE FROM char_deaths WHERE char_id NOT IN (SELECT id FROM chars)""") log.info(f"Removed {c.rowcount:,} orphaned deaths") # Clean up level ups log.debug("Removing duplicate level ups...") c.execute("""SELECT min(rowid), min(date), max(date)-min(date) as diff, count() as c, char_id, level FROM char_levelups GROUP BY char_id, level HAVING c > 1 AND diff < 30""") rows = c.fetchall() count = 0 for rowid, date, diff, _count, char_id, level in rows: c.execute("""DELETE FROM char_levelups WHERE rowid != ? AND char_id = ? AND level = ? AND date-30 < ? AND date+30 > ?""", (rowid, char_id, level, date, date)) count += c.rowcount log.info(f"Removed {count:,} duplicate level ups") log.debug("Removing orphaned level ups...") c.execute("""DELETE FROM char_levelups WHERE char_id NOT IN (SELECT id FROM chars)""") log.info(f"Removed {c.rowcount:,} orphaned levelups") # Clean up event participants log.debug("Removing duplicate event participants...") c.execute("""DELETE FROM event_participants WHERE rowid NOT IN (SELECT min(rowid) FROM event_participants GROUP BY event_id, char_id)""") log.info(f"Removed {c.rowcount:,} duplicate event participants") log.debug("Removing orphaned event participants...") c.execute("""DELETE FROM event_participants WHERE char_id NOT IN (SELECT id FROM chars)""") log.info(f"Removed {c.rowcount:,} orphaned event participants") # Clean up event subscribers log.debug("Removing duplicate event subscribers...") c.execute("""DELETE FROM event_subscribers WHERE rowid NOT IN (SELECT min(rowid) FROM event_subscribers GROUP BY event_id, user_id)""") log.info(f"Removed {c.rowcount:,} duplicate event subscribers") # Remove server properties log.debug("Removing duplicate server properties...") c.execute("""DELETE FROM server_properties WHERE rowid NOT IN (SELECT min(rowid) FROM server_properties GROUP BY server_id, name)""") log.info(f"Removed {c.rowcount:,} duplicate server properties")