def connection(self): if not self.dbc: if self.path != ":memory:": self.path = system.check_database_path(self.path, logging.error) logging.debug("* Database: %s", self.path) self.dbc = sqlite3.connect(self.path) # # To avoid the need to map at hand columns in # a row with the sql schema, which is as error # prone as driving drunk. # self.dbc.row_factory = sqlite3.Row # # Migrate MUST be before table creation. This # is safe because table creation always uses # the IF NOT EXISTS clause. And this is needed # because otherwise we cannot migrate archived # databases (whose version number is old). # The exception is the config table which must # be present because migrate() looks at it. # table_config.create(self.dbc) migrate.migrate(self.dbc) migrate2.migrate(self.dbc) table_speedtest.create(self.dbc) table_geoloc.create(self.dbc) table_bittorrent.create(self.dbc) table_log.create(self.dbc) return self.dbc
def main(args): arguments = args[1:] if len(arguments) != 1: sys.stderr.write("Usage: tool_slice.py file\n") sys.exit(1) # Because below I'm lazy if not arguments[0].endswith(".sqlite3"): sys.stderr.write("error: Input file must end with .sqlite3 suffix\n") sys.exit(1) connection = sqlite3.connect(arguments[0]) connection.row_factory = sqlite3.Row since = TIMES.popleft() while TIMES: until = TIMES.popleft() output = sqlite3.connect(arguments[0].replace(".sqlite3", "_%d-%02d.sqlite3" % (since.year, since.month))) table_speedtest.create(output) cursor = connection.cursor() cursor.execute("""SELECT * FROM speedtest WHERE timestamp >= ? AND timestamp < ?;""", (since.strftime("%s"), until.strftime("%s"))) for row in cursor: table_speedtest.insert(output, dict(row), commit=False, override_timestamp=False) output.commit() since = until
def connection(self): ''' Return connection to database ''' if not self.dbc: database_xxx.linux_fixup_databasedir() if self.path != ":memory:": self.path = system.check_database_path(self.path) logging.debug("* Database: %s", self.path) self.dbc = sqlite3.connect(self.path) # # To avoid the need to map at hand columns in # a row with the sql schema, which is as error # prone as driving drunk. # self.dbc.row_factory = sqlite3.Row # # On POSIX systems, neubot (initially) runs as root, to ensure that # database location, ownership and permissions are OK (as well as # to bind privileged ports). But neubot can also be started by # normal users. In this case, mark the database as readonly since # write operation are going to raise exceptions. # if not system.has_enough_privs(): logging.warning('database: opening database in readonly mode') self.readonly = True return self.dbc # # Migrate MUST be before table creation. This # is safe because table creation always uses # the IF NOT EXISTS clause. And this is needed # because otherwise we cannot migrate archived # databases (whose version number is old). # The exception is the config table which must # be present because migrate() looks at it. # table_config.create(self.dbc) migrate.migrate(self.dbc) migrate2.migrate(self.dbc) table_speedtest.create(self.dbc) table_geoloc.create(self.dbc) table_bittorrent.create(self.dbc) table_log.create(self.dbc) table_raw.create(self.dbc) return self.dbc
def main(args): arguments = args[1:] if len(arguments) != 1: sys.stderr.write("Usage: tool_goodset.py file\n") sys.exit(1) # Because I'm lazy below if not arguments[0].endswith(".sqlite3"): sys.stderr.write("error: Input file must end with .sqlite3\n") sys.exit(1) outfile = arguments[0].replace(".sqlite3", "-goodset.sqlite3") sys.stderr.write("* Output database file: %s\n" % outfile) output = sqlite3.connect(outfile) table_speedtest.create(output) sys.stderr.write("* Processing file: %s... " % arguments[0]) input_dbm = sqlite3.connect(arguments[0]) input_dbm.row_factory = sqlite3.Row # Get the number of rows in the original database cursor = input_dbm.cursor() cursor.execute("SELECT COUNT(*) FROM speedtest;") total = cursor.next()[0] # Copy the goodset to the new database cursor = input_dbm.cursor() cursor.execute("""SELECT * FROM speedtest WHERE download_speed < ? AND latency < ?;""", (MAX_DOWNLOAD_SPEED, MAX_LATENCY)) for row in cursor: table_speedtest.insert(output, dict(row), commit=False, override_timestamp=False) sys.stderr.write("done\n") sys.stderr.write("* Committing changes to: %s\n" % outfile) output.commit() # Get the number of rows in the new database cursor = output.cursor() cursor.execute("SELECT COUNT(*) FROM speedtest;") goodset = cursor.next()[0] if total: sys.stdout.write("%d/%d (%.2f%%)\n" % (goodset, total, goodset * 100.0 / total)) else: sys.stdout.write("0/0 (0.00%)\n")
def connection(self): if not self.dbc: if self.path != ":memory:": self.path = system.check_database_path(self.path, logging.error) logging.debug("* Database: %s" % self.path) self.dbc = sqlite3.connect(self.path) # # To avoid the need to map at hand columns in # a row with the sql schema, which is as error # prone as driving drunk. # self.dbc.row_factory = sqlite3.Row table_config.create(self.dbc) table_speedtest.create(self.dbc) table_geoloc.create(self.dbc) table_bittorrent.create(self.dbc) table_log.create(self.dbc) migrate.migrate(self.dbc) return self.dbc
def runTest(self): """Make sure speedtest table works as expected""" connection = sqlite3.connect(":memory:") connection.row_factory = sqlite3.Row table_speedtest.create(connection) table_speedtest.create(connection) v = map(None, ResultIterator()) for d in v: table_speedtest.insert(connection, d, override_timestamp=False) v1 = table_speedtest.listify(connection) self.assertEquals(sorted(v), sorted(v1)) since = utils.timestamp() - 7 * 24 * 60 * 60 until = utils.timestamp() - 3 * 24 * 60 * 60 v2 = table_speedtest.listify(connection, since=since, until=until) self.assertTrue(len(v2) < len(v)) table_speedtest.prune(connection, until) self.assertTrue(len(table_speedtest.listify(connection)) < len(v1))
def main(args): arguments = args[1:] if len(arguments) != 1: sys.stderr.write("Usage: tool_privacy.py file\n") sys.exit(1) # Because I'm lazy below if not arguments[0].endswith(".sqlite3"): sys.stderr.write("error: Input file name must end with .sqlite3\n") sys.exit(1) connection = sqlite3.connect(arguments[0]) connection.row_factory = sqlite3.Row # # Walk the database once and collect the most recent # permission for each unique identifier. We will then # use it to decide whether we can publish or not. # cursor = connection.cursor() cursor.execute("SELECT * FROM speedtest;") for row in cursor: PERMS[row['uuid']] = (row['privacy_informed'], row['privacy_can_collect'], row['privacy_can_share']) # # Build another database. Yes, from scratch. I don't # want leakage of your personal data to be possible, by # design. # output = sqlite3.connect(arguments[0].replace(".sqlite3", "-privacy.sqlite3")) table_speedtest.create(output) # # Walk again the original database and honour the # privacy permissions. We replace your Internet address # with all zeros, which is quite a good measure to # hide who you are. # total, can_share = 0, 0 cursor = connection.cursor() cursor.execute("SELECT * FROM speedtest;") for row in cursor: total = total + 1 dictionary = dict(row) # Honour permissions if PERMS[dictionary['uuid']] != (1, 1, 1): # # TODO Here it would be nice to geolocate the # client and resolv the provider to populate a # table that gives some information for the # clients that have not given permission. # dictionary['internal_address'] = "0.0.0.0" dictionary['real_address'] = "0.0.0.0" else: can_share = can_share + 1 # Override permissions (dictionary['privacy_informed'], dictionary['privacy_can_collect'], dictionary['privacy_can_share']) = PERMS[dictionary['uuid']] # NOTE commit=False or it will take an Age! table_speedtest.insert(output, dictionary, commit=False, override_timestamp=False) output.execute("VACUUM;") output.commit() # # Spit out per row statistics so we see how many rows we # can publish out of the total number of rows we have been # able to collect. # if total: sys.stdout.write("rows: %d/%d (%.02f%%)\n" % (can_share, total, (100.0 * can_share)/total)) else: sys.stdout.write("rows: 0/0 (0.0%)\n") # # Now tell the poor programmer what is the distribution # of privacy permissions one can find in the wild. # per_uuid= {} total_uuid = len(PERMS.values()) for tpl in PERMS.values(): if not tpl in per_uuid: per_uuid[tpl] = 0 per_uuid[tpl] += 1 if total_uuid: for perm in per_uuid: sys.stdout.write("perms: %s: %d/%d (%.02f%%)\n" % (perm, per_uuid[perm], total_uuid, (100.0 * per_uuid[perm])/total_uuid)) else: sys.stdout.write("perms: N/A: 0/0 (0.0%)\n") sys.exit(0)