def main(data_folder): logger.info("Creating database in {0}".format(data_folder)) txt_files = [f for f in listdir(data_folder) if (isfile(join(data_folder, f)) and splitext(f)[1] == ".txt")] db_files = [f for f in listdir(data_folder) if (isfile(join(data_folder, f)) and splitext(f)[1] == ".db")] # delete all existing database files for f in db_files: logger.info("Deleting existing database file: {0}".format(join(data_folder, f))) remove(join(data_folder, f)) # create new file logger.info("Creating database file: {0}".format(join(data_folder, "database.db"))) con = sqlite3.connect(join(data_folder, "database.db")) cur = con.cursor() for f in txt_files: logger.info("Processing file: {0}".format(join(data_folder, f))) # we use the file name as table name table_name = splitext(f)[0] with open(join(data_folder, f), 'rb') as csvfile: # try to determine the csv dialect dialect = Sniffer().sniff(csvfile.read(1024)) csvfile.seek(0) reader = DictReader(csvfile, dialect=dialect) # this will be our fields (columns) field_names = reader.fieldnames if len(field_names) == 0: logger.error("No header read from file: {0}. Ignoring this file.".format(join(data_folder, f))) continue search_fields = {"arrival_time":"arrival_time_sec", "departure_time":"departure_time_sec"} if set(search_fields.keys()).issubset(field_names): field_names.extend(search_fields.values()) fields_in_statement = [] for field in field_names: if field in __real_columns: fields_in_statement.append(field + " REAL") else: if field in __integer_columns: fields_in_statement.append(field + " INTEGER") else: fields_in_statement.append(field) statement = "create table {0}({1});".format(table_name, ", ".join(fields_in_statement)) _execute(cur, statement) quest = ["?" for i in range(0, len(field_names))] count = 0 for row in reader: for k in search_fields.keys(): if row.has_key(k): row[search_fields[k]] = utils.convert_time_to_sec(unicode(row[k], 'utf-8')) data = [] for n in field_names: data.append(unicode(str(row[n]), 'utf-8')) statement = "insert into {0} values ({1});".format(table_name, ", ".join(quest)) cur.execute(statement, data) count += 1 logger.info("Read {0} rows into {1}".format(count, table_name)) con.commit() statement = "CREATE UNIQUE INDEX stops_stop_id_index ON stops (stop_id)" _execute(cur, statement) statement = "CREATE INDEX stop_times_stop_id_index ON stop_times (stop_id )" _execute(cur, statement) statement = "CREATE INDEX stop_times_trip_id_index ON stop_times (trip_id )" _execute(cur, statement) statement = "CREATE INDEX transfers_transfer_time_index ON transfers (min_transfer_time)" _execute(cur, statement) statement = "CREATE INDEX transfers_transfer_from_stop_id_index ON transfers (from_stop_id)" _execute(cur, statement) statement = "CREATE INDEX stops_stop_name_idx ON stops (stop_name)" _execute(cur, statement) con.commit() con.close() logger.info("Done.")
def travel_time(self, time): self._travel_time = convert_time_to_sec(time)
def time_window(self, time_window): self._time_window_sec = convert_time_to_sec(time_window)
def start_time(self, time): self._start_time_sec = convert_time_to_sec(time)