Beispiel #1
0
 def add_sql_table(self, stmt):
     """Use the SQL parser to parse the create statement. Each parsed row is returned as a dictionary.
     The keys of the dictionary just happen to match the parameters for the Variable class.
     """
     sql = sql_parse_create(stmt)
     table = Table(name=sql[schema.SQL_TABLE])
     for vdef in sql[schema.SQL_COLUMNS]:
         v = Variable(vtype=vdef['vtype'], name=vdef['name'])
         table.add_variable(v)
     self.add_table(table)
Beispiel #2
0
    def load_schema_from_file(self, filename):

        if filename.endswith(".docx"):
            raise RuntimeError(
                "Schema cannot read .docx files; you probably want to use CensusSpec"
            )

        if filename.endswith(".xlsx"):
            raise RuntimeError(
                "Schema cannot read .docx files; you probably want to use CensusSpec"
            )

        # Make a table
        table_name = os.path.splitext(os.path.split(filename)[1])[0]
        table_name = table_name.replace("-", "_")
        table = Table(name=table_name)
        table.filename = filename
        table.add_comment("Parsed from {}".format(filename))

        # Load the schema from the data file.
        # This will use pandas to read a single record.
        for chunk in self.get_pandas_file_reader(filename, chunksize=1):
            for row in chunk.to_dict(orient='records'):
                for colName in chunk.columns:
                    v = Variable()
                    v.set_name(colName)
                    v.set_vtype(vtype_for_numpy_type(type(row[colName])))
                    table.add_variable(v)
                self.add_table(table)
                return
Beispiel #3
0
 def get_table(self, name, create=False):
     """Get the named table. If create is true, create the table if it doesn't exist."""
     try:
         return self.tabledict[name]
     except KeyError as e:
         if create:
             table = Table(name=name)
             self.add_table(table)
             return table
         logging.error("Table {} requested; current tables: {}".format(
             name, self.table_names()))
         raise KeyError("Table {} does not exist".format(name))
Beispiel #4
0
 def add_table_named(self, *, name, **kwargs):
     return self.add_table(Table(name=name, **kwargs))
Beispiel #5
0
def test_Table():
    s = Schema()
    t = Table(name="students")
    s.add_table(t)
    name = Variable(name="name",vtype='VARCHAR(4)',column=0,width=4)
    assert(name.python_type==str)
    age  = Variable(name="age",vtype='INTEGER(2)',column=4,width=2)
    t.add_variable(name)
    t.add_variable(age)
    assert name.column==0
    assert name.width==4
    assert age.column==4
    assert age.width==2

    assert t.get_variable("name") == name
    assert t.get_variable("age") == age
    assert list(t.vars()) == [name,age]

    # Try SQL conversion
    sql = t.sql_schema()
    assert "CREATE TABLE students" in sql
    assert "name VARCHAR" in sql
    assert "age INTEGER" in sql

    # See if the parsers work
    data = t.parse_line_to_dict(DATALINE1) 
    assert data == {"name":"jack","age":10}
    assert t.parse_line_to_row(DATALINE1) == ["jack",10]

    # Add a second table
    t = Table(name="parents")
    s.add_table(t)
    t.add_variable(Variable(name="parent",vtype=schema.TYPE_VARCHAR))

    # See if adding a recode works
    s.add_recode("recode1",schema.TYPE_VARCHAR,"parents[studentname]=students[name]")
    s.add_recode("recode2",schema.TYPE_INTEGER,"parents[three]=3")
    s.add_recode("recode3",schema.TYPE_VARCHAR,"parents[student_initials]=students[name][0:1]")
    s.compile_recodes()

    # verify that the parents table now has a student name variable of the correct type
    assert s.get_table("parents").get_variable("studentname").name == "studentname"
    assert s.get_table("parents").get_variable("studentname").vtype == schema.TYPE_VARCHAR

    # Let's load a line of data for recoding
    s.recode_load_data("students",data)

    # Now record a parent record
    parent = {"name":"xxxx"}
    s.recode_execute("parents",parent)

    # Now make sure that the recoded data is there
    assert parent['studentname']=='jack'
    assert parent['three']==3
    assert parent['student_initials']=='j'