def test_Table(): s = Schema() t = Table(name="students") s.add_table(t) name = Variable(name="name",vtype='VARCHAR(4)',column=0,width=4) assert(name.python_type==str) age = Variable(name="age",vtype='INTEGER(2)',column=4,width=2) t.add_variable(name) t.add_variable(age) assert name.column==0 assert name.width==4 assert age.column==4 assert age.width==2 assert t.get_variable("name") == name assert t.get_variable("age") == age assert list(t.vars()) == [name,age] # Try SQL conversion sql = t.sql_schema() assert "CREATE TABLE students" in sql assert "name VARCHAR" in sql assert "age INTEGER" in sql # See if the parsers work data = t.parse_line_to_dict(DATALINE1) assert data == {"name":"jack","age":10} assert t.parse_line_to_row(DATALINE1) == ["jack",10] # Add a second table t = Table(name="parents") s.add_table(t) t.add_variable(Variable(name="parent",vtype=schema.TYPE_VARCHAR)) # See if adding a recode works s.add_recode("recode1",schema.TYPE_VARCHAR,"parents[studentname]=students[name]") s.add_recode("recode2",schema.TYPE_INTEGER,"parents[three]=3") s.add_recode("recode3",schema.TYPE_VARCHAR,"parents[student_initials]=students[name][0:1]") s.compile_recodes() # verify that the parents table now has a student name variable of the correct type assert s.get_table("parents").get_variable("studentname").name == "studentname" assert s.get_table("parents").get_variable("studentname").vtype == schema.TYPE_VARCHAR # Let's load a line of data for recoding s.recode_load_data("students",data) # Now record a parent record parent = {"name":"xxxx"} s.recode_execute("parents",parent) # Now make sure that the recoded data is there assert parent['studentname']=='jack' assert parent['three']==3 assert parent['student_initials']=='j'
def load_schema_from_file(self, filename): if filename.endswith(".docx"): raise RuntimeError( "Schema cannot read .docx files; you probably want to use CensusSpec" ) if filename.endswith(".xlsx"): raise RuntimeError( "Schema cannot read .docx files; you probably want to use CensusSpec" ) # Make a table table_name = os.path.splitext(os.path.split(filename)[1])[0] table_name = table_name.replace("-", "_") table = Table(name=table_name) table.filename = filename table.add_comment("Parsed from {}".format(filename)) # Load the schema from the data file. # This will use pandas to read a single record. for chunk in self.get_pandas_file_reader(filename, chunksize=1): for row in chunk.to_dict(orient='records'): for colName in chunk.columns: v = Variable() v.set_name(colName) v.set_vtype(vtype_for_numpy_type(type(row[colName]))) table.add_variable(v) self.add_table(table) return
def add_sql_table(self, stmt): """Use the SQL parser to parse the create statement. Each parsed row is returned as a dictionary. The keys of the dictionary just happen to match the parameters for the Variable class. """ sql = sql_parse_create(stmt) table = Table(name=sql[schema.SQL_TABLE]) for vdef in sql[schema.SQL_COLUMNS]: v = Variable(vtype=vdef['vtype'], name=vdef['name']) table.add_variable(v) self.add_table(table)
def get_table(self, name, create=False): """Get the named table. If create is true, create the table if it doesn't exist.""" try: return self.tabledict[name] except KeyError as e: if create: table = Table(name=name) self.add_table(table) return table logging.error("Table {} requested; current tables: {}".format( name, self.table_names())) raise KeyError("Table {} does not exist".format(name))
def add_table_named(self, *, name, **kwargs): return self.add_table(Table(name=name, **kwargs))