def test_find_table(self): tables = [ "author", "domain", "domain_author", "organization", "publication", "writes", ] relations = [ abstract_sql.ForeignKeyRelation( child_table="publication", parent_table="writes", child_column="pid", parent_column="pid", ), abstract_sql.ForeignKeyRelation( child_table="author", parent_table="writes", child_column="aid", parent_column="aid", ), abstract_sql.ForeignKeyRelation( child_table="author", parent_table="organization", child_column="oid", parent_column="oid", ), abstract_sql.ForeignKeyRelation( child_table="author", parent_table="domain_author", child_column="aid", parent_column="aid", ), abstract_sql.ForeignKeyRelation( child_table="domain", parent_table="domain_author", child_column="did", parent_column="did", ), ] from_clause_spans = abstract_sql._get_from_clause_for_tables(tables, relations) from_clause = abstract_sql.sql_spans_to_string(from_clause_spans, sep=" ") expected_from_clause = ( "author " "join domain_author on author.aid = domain_author.aid " "join domain on domain_author.did = domain.did " "join organization on author.oid = organization.oid " "join writes on author.aid = writes.aid " "join publication on writes.pid = publication.pid" ) self.assertEqual(expected_from_clause, from_clause)
def michigan_db_to_foreign_key_tuples(db): """Returns a list of abstract_sql.ForeignKeyRelation.""" # Michigan doesn't come with a gold standard of *which* columns are foreign # keys with one another. For now: assume that they're foreign keys if the # schema marks them as foreign keys and the column name is the same in both # tables. # TODO(alanesuhr): This is a bit hacky because there might be exceptions # (e.g., columns that are foreign keys with different names). foreign_keys = list() for table_name, columns in db.items(): for data in columns: could_have_link = schema_utils.column_is_foreign_key( data) or schema_utils.column_is_primary_key(data) column_name = data['field name'].lower() if could_have_link: for other_table_name, other_columns in db.items(): if table_name != other_table_name: for other_data in other_columns: other_column = other_data['field name'].lower() if other_column == column_name: if schema_utils.column_is_foreign_key( other_data ) or schema_utils.column_is_primary_key( other_data): foreign_keys.append( abstract_sql.ForeignKeyRelation( child_table=table_name.lower(), parent_table=other_table_name. lower(), child_column=column_name, parent_column=other_column)) return foreign_keys
def spider_db_to_foreign_key_tuples(db): """Return list of abstract_sql.ForiegnKeyRelation.""" # The format of the db json object is documented here: # https://github.com/taoyds/spider#tables # List of string table names. table_names = db['table_names_original'] # List of lists with [table_idx, column_name]. column_list = db['column_names_original'] foreign_keys = [] for foreign_key_idx_pair in db['foreign_keys']: child_column_id = foreign_key_idx_pair[0] parent_column_id = foreign_key_idx_pair[1] child_table_id = column_list[child_column_id][0] child_column_name = column_list[child_column_id][1].lower() parent_table_id = column_list[parent_column_id][0] parent_column_name = column_list[parent_column_id][1].lower() child_table_name = table_names[child_table_id].lower() parent_table_name = table_names[parent_table_id].lower() foreign_keys.append( abstract_sql.ForeignKeyRelation( child_table=child_table_name, parent_table=parent_table_name, child_column=child_column_name, parent_column=parent_column_name)) return foreign_keys
def test_restore_from_string(self): sql_spans = abstract_sql.sql_to_sql_spans(TEST_QUERY) replaced_spans = abstract_sql.replace_from_clause(sql_spans) replaced_sql = abstract_sql.sql_spans_to_string(replaced_spans) parsed_spans = abstract_sql.sql_to_sql_spans(replaced_sql) fk_relations = [ abstract_sql.ForeignKeyRelation("user_profiles", "follows", "uid", "f1") ] restored_spans = abstract_sql.restore_from_clause( parsed_spans, fk_relations=fk_relations ) restored_sql = abstract_sql.sql_spans_to_string(restored_spans) expected_restored_sql = "select user_profiles.name from follows join user_profiles on follows.f1 = user_profiles.uid group by follows.f1 having count ( * ) > ( select count ( * ) from follows join user_profiles on follows.f1 = user_profiles.uid where user_profiles.name = 'tyler swift' )" self.assertEqual(expected_restored_sql, restored_sql)
def michigan_db_to_foreign_key_tuples_orcale(dataset_name): """Returns a list of abstract_sql.ForeignKeyRelation.""" # Uses hand curated oracle foreign key annotations. if dataset_name == 'academic': return [ abstract_sql.ForeignKeyRelation('publication', 'writes', 'pid', 'pid'), abstract_sql.ForeignKeyRelation('author', 'writes', 'aid', 'aid'), abstract_sql.ForeignKeyRelation('journal', 'publication', 'jid', 'jid'), abstract_sql.ForeignKeyRelation('conference', 'publication', 'cid', 'cid'), abstract_sql.ForeignKeyRelation('publication', 'publication_keyword', 'pid', 'pid'), abstract_sql.ForeignKeyRelation('keyword', 'publication_keyword', 'kid', 'kid'), abstract_sql.ForeignKeyRelation('author', 'organization', 'oid', 'oid'), abstract_sql.ForeignKeyRelation('author', 'domain_author', 'aid', 'aid'), abstract_sql.ForeignKeyRelation('domain', 'domain_author', 'did', 'did'), abstract_sql.ForeignKeyRelation('domain', 'domain_publication', 'did', 'did'), abstract_sql.ForeignKeyRelation('domain_publication', 'publication', 'pid', 'pid'), abstract_sql.ForeignKeyRelation('cite', 'publication', 'cited', 'pid'), abstract_sql.ForeignKeyRelation('cite', 'publication', 'citing', 'pid'), abstract_sql.ForeignKeyRelation('domain', 'domain_keyword', 'did', 'did'), abstract_sql.ForeignKeyRelation('domain_keyword', 'keyword', 'kid', 'kid'), abstract_sql.ForeignKeyRelation('domain', 'domain_journal', 'did', 'did'), abstract_sql.ForeignKeyRelation('domain_journal', 'journal', 'jid', 'jid'), abstract_sql.ForeignKeyRelation('conference', 'domain_conference', 'cid', 'cid'), abstract_sql.ForeignKeyRelation('domain', 'domain_conference', 'did', 'did'), ] elif dataset_name == 'advising': return [ abstract_sql.ForeignKeyRelation('course', 'course_offering', 'course_id', 'course_id'), abstract_sql.ForeignKeyRelation('course_offering', 'semester', 'semester', 'semester_id'), abstract_sql.ForeignKeyRelation('course', 'program_course', 'course_id', 'course_id'), abstract_sql.ForeignKeyRelation('course_offering', 'offering_instructor', 'offering_id', 'offering_id'), abstract_sql.ForeignKeyRelation('instructor', 'offering_instructor', 'instructor_id', 'instructor_id'), abstract_sql.ForeignKeyRelation('course', 'course_prerequisite', 'course_id', 'course_id'), abstract_sql.ForeignKeyRelation('course', 'course_prerequisite', 'course_id', 'pre_course_id'), abstract_sql.ForeignKeyRelation('course_offering', 'program_course', 'course_id', 'course_id'), abstract_sql.ForeignKeyRelation('area', 'course', 'course_id', 'course_id'), abstract_sql.ForeignKeyRelation('program', 'program_course', 'program_id', 'program_id'), abstract_sql.ForeignKeyRelation('course', 'student_record', 'course_id', 'course_id'), abstract_sql.ForeignKeyRelation('offering_instructor', 'student_record', 'offering_id', 'offering_id'), abstract_sql.ForeignKeyRelation('area', 'student_record', 'course_id', 'course_id'), abstract_sql.ForeignKeyRelation('gsi', 'student', 'student_id', 'student_id'), abstract_sql.ForeignKeyRelation('course_offering', 'gsi', 'offering_id', 'course_offering_id'), abstract_sql.ForeignKeyRelation('program_course', 'student_record', 'course_id', 'course_id'), abstract_sql.ForeignKeyRelation('student', 'student_record', 'student_id', 'student_id'), abstract_sql.ForeignKeyRelation('offering_instructor', 'offering_instructor', 'offering_id', 'offering_id'), abstract_sql.ForeignKeyRelation('program', 'program_requirement', 'program_id', 'program_id'), abstract_sql.ForeignKeyRelation('course_offering', 'course_offering', 'semester', 'semester'), abstract_sql.ForeignKeyRelation('semester', 'student_record', 'semester_id', 'semester'), abstract_sql.ForeignKeyRelation('area', 'course_offering', 'course_id', 'course_id'), abstract_sql.ForeignKeyRelation('course', 'course', 'course_id', 'course_id'), ] elif dataset_name == 'atis': return [ abstract_sql.ForeignKeyRelation('airport_service', 'city', 'city_code', 'city_code'), abstract_sql.ForeignKeyRelation('airport_service', 'flight', 'airport_code', 'from_airport'), abstract_sql.ForeignKeyRelation('airport_service', 'flight', 'airport_code', 'to_airport'), abstract_sql.ForeignKeyRelation('date_day', 'days', 'day_name', 'day_name'), abstract_sql.ForeignKeyRelation('days', 'flight', 'days_code', 'flight_days'), abstract_sql.ForeignKeyRelation('fare', 'flight_fare', 'fare_id', 'fare_id'), abstract_sql.ForeignKeyRelation('flight', 'flight_fare', 'flight_id', 'flight_id'), abstract_sql.ForeignKeyRelation('fare', 'fare_basis', 'fare_basis_code', 'fare_basis_code'), abstract_sql.ForeignKeyRelation('flight', 'flight_stop', 'flight_id', 'flight_id'), abstract_sql.ForeignKeyRelation('days', 'fare_basis', 'days_code', 'basis_days'), abstract_sql.ForeignKeyRelation('airport_service', 'flight_stop', 'airport_code', 'stop_airport'), abstract_sql.ForeignKeyRelation('city', 'ground_service', 'city_code', 'city_code'), abstract_sql.ForeignKeyRelation('airline', 'flight', 'airline_code', 'airline_code'), abstract_sql.ForeignKeyRelation('airport', 'airport_service', 'airport_code', 'airport_code'), abstract_sql.ForeignKeyRelation('flight', 'food_service', 'meal_code', 'meal_code'), abstract_sql.ForeignKeyRelation('aircraft', 'equipment_sequence', 'aircraft_code', 'aircraft_code'), abstract_sql.ForeignKeyRelation('equipment_sequence', 'flight', 'aircraft_code_sequence', 'aircraft_code_sequence'), abstract_sql.ForeignKeyRelation('city', 'state', 'state_code', 'state_code'), abstract_sql.ForeignKeyRelation('airport', 'flight', 'airport_code', 'to_airport'), abstract_sql.ForeignKeyRelation('airport', 'ground_service', 'airport_code', 'airport_code'), abstract_sql.ForeignKeyRelation('airport', 'flight', 'airport_code', 'from_airport'), abstract_sql.ForeignKeyRelation('airport_service', 'fare', 'airport_code', 'to_airport'), abstract_sql.ForeignKeyRelation('airport_service', 'fare', 'airport_code', 'from_airport'), abstract_sql.ForeignKeyRelation('flight', 'flight_leg', 'flight_id', 'flight_id'), abstract_sql.ForeignKeyRelation('flight', 'flight_leg', 'flight_id', 'leg_flight'), abstract_sql.ForeignKeyRelation('class_of_service', 'fare_basis', 'booking_class', 'booking_class'), abstract_sql.ForeignKeyRelation('airport', 'state', 'state_code', 'state_code'), abstract_sql.ForeignKeyRelation('airport', 'flight_stop', 'airport_code', 'stop_airport'), abstract_sql.ForeignKeyRelation('fare', 'restriction', 'restriction_code', 'restriction_code'), ] elif dataset_name == 'imdb': return [ abstract_sql.ForeignKeyRelation('actor', 'cast', 'aid', 'aid'), abstract_sql.ForeignKeyRelation('cast', 'movie', 'msid', 'mid'), abstract_sql.ForeignKeyRelation('directed_by', 'director', 'did', 'did'), abstract_sql.ForeignKeyRelation('directed_by', 'movie', 'msid', 'mid'), abstract_sql.ForeignKeyRelation('company', 'copyright', 'id', 'cid'), abstract_sql.ForeignKeyRelation('copyright', 'movie', 'msid', 'mid'), abstract_sql.ForeignKeyRelation('keyword', 'tags', 'id', 'kid'), abstract_sql.ForeignKeyRelation('movie', 'tags', 'mid', 'msid'), abstract_sql.ForeignKeyRelation('classification', 'movie', 'msid', 'mid'), abstract_sql.ForeignKeyRelation('made_by', 'producer', 'pid', 'pid'), abstract_sql.ForeignKeyRelation('classification', 'genre', 'gid', 'gid'), abstract_sql.ForeignKeyRelation('movie', 'written_by', 'mid', 'msid'), abstract_sql.ForeignKeyRelation('made_by', 'movie', 'msid', 'mid'), abstract_sql.ForeignKeyRelation('writer', 'written_by', 'wid', 'wid'), abstract_sql.ForeignKeyRelation('copyright', 'tv_series', 'msid', 'sid'), abstract_sql.ForeignKeyRelation('cast', 'tv_series', 'msid', 'sid'), abstract_sql.ForeignKeyRelation('directed_by', 'tv_series', 'msid', 'sid'), abstract_sql.ForeignKeyRelation('made_by', 'tv_series', 'msid', 'sid'), ] elif dataset_name == 'geoquery': return [ abstract_sql.ForeignKeyRelation('border_info', 'state', 'border', 'state_name'), abstract_sql.ForeignKeyRelation('river', 'state', 'traverse', 'state_name'), abstract_sql.ForeignKeyRelation('city', 'state', 'city_name', 'capital'), abstract_sql.ForeignKeyRelation('border_info', 'state', 'state_name', 'state_name'), abstract_sql.ForeignKeyRelation('city', 'state', 'state_name', 'state_name'), abstract_sql.ForeignKeyRelation('border_info', 'river', 'border', 'traverse'), abstract_sql.ForeignKeyRelation('highlow', 'state', 'state_name', 'state_name'), abstract_sql.ForeignKeyRelation('border_info', 'border_info', 'border', 'state_name'), abstract_sql.ForeignKeyRelation('highlow', 'river', 'state_name', 'traverse'), abstract_sql.ForeignKeyRelation('border_info', 'river', 'state_name', 'traverse'), abstract_sql.ForeignKeyRelation('city', 'river', 'state_name', 'traverse'), abstract_sql.ForeignKeyRelation('border_info', 'highlow', 'border', 'state_name'), abstract_sql.ForeignKeyRelation('border_info', 'city', 'border', 'state_name'), ] elif dataset_name == 'restaurants': return [ abstract_sql.ForeignKeyRelation('location', 'restaurant', 'restaurant_id', 'id'), abstract_sql.ForeignKeyRelation('geographic', 'restaurant', 'city_name', 'city_name'), ] elif dataset_name == 'scholar': return [ abstract_sql.ForeignKeyRelation('author', 'writes', 'authorid', 'authorid'), abstract_sql.ForeignKeyRelation('paper', 'writes', 'paperid', 'paperid'), abstract_sql.ForeignKeyRelation('keyphrase', 'paperkeyphrase', 'keyphraseid', 'keyphraseid'), abstract_sql.ForeignKeyRelation('paper', 'paperkeyphrase', 'paperid', 'paperid'), abstract_sql.ForeignKeyRelation('paper', 'venue', 'venueid', 'venueid'), abstract_sql.ForeignKeyRelation('cite', 'paper', 'citedpaperid', 'paperid'), abstract_sql.ForeignKeyRelation('dataset', 'paperdataset', 'datasetid', 'datasetid'), abstract_sql.ForeignKeyRelation('writes', 'writes', 'paperid', 'paperid'), abstract_sql.ForeignKeyRelation('paper', 'paperdataset', 'paperid', 'paperid'), abstract_sql.ForeignKeyRelation('paperdataset', 'paperkeyphrase', 'paperid', 'paperid'), abstract_sql.ForeignKeyRelation('paperkeyphrase', 'writes', 'paperid', 'paperid'), abstract_sql.ForeignKeyRelation('cite', 'writes', 'citedpaperid', 'paperid'), abstract_sql.ForeignKeyRelation('cite', 'writes', 'citingpaperid', 'paperid'), abstract_sql.ForeignKeyRelation('cite', 'paper', 'citingpaperid', 'paperid'), abstract_sql.ForeignKeyRelation('journal', 'paper', 'journalid', 'journalid'), abstract_sql.ForeignKeyRelation('field', 'paperfield', 'fieldid', 'fieldid'), abstract_sql.ForeignKeyRelation('cite', 'paperkeyphrase', 'citingpaperid', 'paperid'), abstract_sql.ForeignKeyRelation('cite', 'paperkeyphrase', 'citedpaperid', 'paperid'), abstract_sql.ForeignKeyRelation('paper', 'paperfield', 'paperid', 'paperid'), ] elif dataset_name == 'yelp': return [ abstract_sql.ForeignKeyRelation('business', 'category', 'business_id', 'business_id'), abstract_sql.ForeignKeyRelation('review', 'user', 'user_id', 'user_id'), abstract_sql.ForeignKeyRelation('business', 'review', 'business_id', 'business_id'), abstract_sql.ForeignKeyRelation('business', 'neighborhood', 'business_id', 'business_id'), abstract_sql.ForeignKeyRelation('tip', 'user', 'user_id', 'user_id'), abstract_sql.ForeignKeyRelation('business', 'tip', 'business_id', 'business_id'), abstract_sql.ForeignKeyRelation('business', 'checkin', 'business_id', 'business_id'), ] else: raise ValueError('Unknown dataset: %s' % dataset_name)
def michigan_db_to_foreign_key_tuples_orcale(dataset_name): """Returns a list of abstract_sql.ForeignKeyRelation.""" # Uses hand curated oracle foreign key annotations. if dataset_name == 'academic': return [ abstract_sql.ForeignKeyRelation('publication', 'writes', 'pid', 'pid'), abstract_sql.ForeignKeyRelation('author', 'writes', 'aid', 'aid'), abstract_sql.ForeignKeyRelation('journal', 'publication', 'jid', 'jid'), abstract_sql.ForeignKeyRelation('conference', 'publication', 'cid', 'cid'), abstract_sql.ForeignKeyRelation('publication', 'publication_keyword', 'pid', 'pid'), abstract_sql.ForeignKeyRelation('keyword', 'publication_keyword', 'kid', 'kid'), abstract_sql.ForeignKeyRelation('author', 'organization', 'oid', 'oid'), abstract_sql.ForeignKeyRelation('author', 'domain_author', 'aid', 'aid'), abstract_sql.ForeignKeyRelation('domain', 'domain_author', 'did', 'did'), abstract_sql.ForeignKeyRelation('domain', 'domain_publication', 'did', 'did'), abstract_sql.ForeignKeyRelation('domain_publication', 'publication', 'pid', 'pid'), abstract_sql.ForeignKeyRelation('cite', 'publication', 'cited', 'pid'), abstract_sql.ForeignKeyRelation('cite', 'publication', 'citing', 'pid'), abstract_sql.ForeignKeyRelation('domain', 'domain_keyword', 'did', 'did'), abstract_sql.ForeignKeyRelation('domain_keyword', 'keyword', 'kid', 'kid'), abstract_sql.ForeignKeyRelation('domain', 'domain_journal', 'did', 'did'), abstract_sql.ForeignKeyRelation('domain_journal', 'journal', 'jid', 'jid'), abstract_sql.ForeignKeyRelation('conference', 'domain_conference', 'cid', 'cid'), abstract_sql.ForeignKeyRelation('domain', 'domain_conference', 'did', 'did'), ] elif dataset_name == 'atis': return [ abstract_sql.ForeignKeyRelation('airport_service', 'city', 'city_code', 'city_code'), abstract_sql.ForeignKeyRelation('airport_service', 'flight', 'airport_code', 'from_airport'), abstract_sql.ForeignKeyRelation('airport_service', 'flight', 'airport_code', 'to_airport'), abstract_sql.ForeignKeyRelation('date_day', 'days', 'day_name', 'day_name'), abstract_sql.ForeignKeyRelation('days', 'flight', 'days_code', 'flight_days'), abstract_sql.ForeignKeyRelation('fare', 'flight_fare', 'fare_id', 'fare_id'), abstract_sql.ForeignKeyRelation('flight', 'flight_fare', 'flight_id', 'flight_id'), abstract_sql.ForeignKeyRelation('fare', 'fare_basis', 'fare_basis_code', 'fare_basis_code'), abstract_sql.ForeignKeyRelation('flight', 'flight_stop', 'flight_id', 'flight_id'), abstract_sql.ForeignKeyRelation('days', 'fare_basis', 'days_code', 'basis_days'), abstract_sql.ForeignKeyRelation('airport_service', 'flight_stop', 'airport_code', 'stop_airport'), abstract_sql.ForeignKeyRelation('city', 'ground_service', 'city_code', 'city_code'), abstract_sql.ForeignKeyRelation('airline', 'flight', 'airline_code', 'airline_code'), abstract_sql.ForeignKeyRelation('airport', 'airport_service', 'airport_code', 'airport_code'), abstract_sql.ForeignKeyRelation('flight', 'food_service', 'meal_code', 'meal_code'), abstract_sql.ForeignKeyRelation('aircraft', 'equipment_sequence', 'aircraft_code', 'aircraft_code'), abstract_sql.ForeignKeyRelation('equipment_sequence', 'flight', 'aircraft_code_sequence', 'aircraft_code_sequence'), abstract_sql.ForeignKeyRelation('city', 'state', 'state_code', 'state_code'), abstract_sql.ForeignKeyRelation('airport', 'flight', 'airport_code', 'to_airport'), abstract_sql.ForeignKeyRelation('airport', 'ground_service', 'airport_code', 'airport_code'), abstract_sql.ForeignKeyRelation('airport', 'flight', 'airport_code', 'from_airport'), abstract_sql.ForeignKeyRelation('airport_service', 'fare', 'airport_code', 'to_airport'), abstract_sql.ForeignKeyRelation('airport_service', 'fare', 'airport_code', 'from_airport'), abstract_sql.ForeignKeyRelation('flight', 'flight_leg', 'flight_id', 'flight_id'), abstract_sql.ForeignKeyRelation('flight', 'flight_leg', 'flight_id', 'leg_flight'), abstract_sql.ForeignKeyRelation('class_of_service', 'fare_basis', 'booking_class', 'booking_class'), abstract_sql.ForeignKeyRelation('airport', 'state', 'state_code', 'state_code'), abstract_sql.ForeignKeyRelation('airport', 'flight_stop', 'airport_code', 'stop_airport'), abstract_sql.ForeignKeyRelation('fare', 'restriction', 'restriction_code', 'restriction_code'), ] elif dataset_name == 'geography': return [ abstract_sql.ForeignKeyRelation('border_info', 'state', 'border', 'state_name'), abstract_sql.ForeignKeyRelation('river', 'state', 'traverse', 'state_name'), abstract_sql.ForeignKeyRelation('city', 'state', 'city_name', 'capital'), abstract_sql.ForeignKeyRelation('border_info', 'state', 'state_name', 'state_name'), abstract_sql.ForeignKeyRelation('city', 'state', 'state_name', 'state_name'), abstract_sql.ForeignKeyRelation('border_info', 'river', 'border', 'traverse'), abstract_sql.ForeignKeyRelation('highlow', 'state', 'state_name', 'state_name'), abstract_sql.ForeignKeyRelation('border_info', 'border_info', 'border', 'state_name'), abstract_sql.ForeignKeyRelation('highlow', 'river', 'state_name', 'traverse'), abstract_sql.ForeignKeyRelation('border_info', 'river', 'state_name', 'traverse'), abstract_sql.ForeignKeyRelation('city', 'river', 'state_name', 'traverse'), abstract_sql.ForeignKeyRelation('border_info', 'highlow', 'border', 'state_name'), abstract_sql.ForeignKeyRelation('border_info', 'city', 'border', 'state_name'), abstract_sql.ForeignKeyRelation('state', 'state', 'state_name', 'state_name'), abstract_sql.ForeignKeyRelation('border_info', 'border_info', 'border', 'border'), abstract_sql.ForeignKeyRelation('city', 'highlow', 'state_name', 'state_name'), abstract_sql.ForeignKeyRelation('highlow', 'state', 'highest_point', 'capital'), abstract_sql.ForeignKeyRelation('border_info', 'lake', 'border', 'state_name'), abstract_sql.ForeignKeyRelation('river', 'river', 'river_name', 'river_name'), ] elif dataset_name == 'restaurants': return [ abstract_sql.ForeignKeyRelation('location', 'restaurant', 'restaurant_id', 'id'), abstract_sql.ForeignKeyRelation('geographic', 'restaurant', 'city_name', 'city_name'), ] else: raise ValueError('Unknown dataset: %s' % dataset_name)