Example #1
0
    def build(self):
        orig_bundle = self.library.require('test') # original data
        dim_bundle = self.library.require('dim') # dimension table from orig
        
        sink = self.database.path

        tags = (petl.fromsqlite3(dim_bundle.database.path, "SELECT * FROM tags"))
        flags = (petl.fromsqlite3(dim_bundle.database.path, "SELECT * FROM flags"))
        
        orig = (petl.fromsqlite3(orig_bundle.database.path, "SELECT * FROM example"))
        
        print petl.look(tags)
        print petl.look(flags)
     
        fact = (orig
            .hashjoin(tags,key='tag').cutout('tag')
            .hashjoin(flags,key='flags').cutout('flags'))
        
        print petl.look(fact)
        
        fact.tosqlite3(sink, 'example', create=False)
      
        self.log("Wrote fact table to: "+self.database.path)
      
        return True
Example #2
0
    def _do_query_extract(self,  extract_data):
        """Extract a CSV file and  upload it to CKAN"""
        import tempfile
        import uuid
        import os

        p = extract_data['_partition'] # Set in _make_partition_dict
     
        file_name = extract_data.get('name', None)
        
        if file_name:
            file_ = self.bundle.filesystem.path('extracts', file_name)
        else:
            file_ =  os.path.join(tempfile.gettempdir(), str(uuid.uuid4()) )

    
        self.bundle.log("Extracting {} to {}".format(extract_data['title'],file_))

        petlf.fromsqlite3(p.database.path, extract_data['query'] ).tocsv(file_) #@UndefinedVariable
  
        return file_       
Example #3
0
    def build(self):
        test_bundle = self.library.require('test')
        source_db_path = test_bundle.database.path
        
        sink_db_path = self.database.path
        self.log("Processing original: "+source_db_path)
        table = (petl.fromsqlite3(source_db_path, "SELECT * FROM example")
                 .convertnumbers())
   
        tags = (table.valuecounts('tag')
                .cut('value')
                .addrownumbers(10)
                .rename('value','tag')
                .rename('row','tag_id'))
        
        tags.tosqlite3(sink_db_path, 'tags')
        
        # Save again for the parition. 
        part = self.partitions.find(table='tags')
        tags.tosqlite3(part.database.path, 'tags')
        
        flags = (table.valuecounts('flags')
                .cut('value')
                .addrownumbers(10)
                .rename('value','flags')
                .rename('row','flags_id')) 


        # Testing new petl code. 
        #dummy, flags = table.unjoin('flags',autoincrement=(10, 1))
        #print petl.look(flags)

        flags.tosqlite3(sink_db_path, 'flags')
     
         # Save again for the parition. 
        part = self.partitions.find(table='flags')
        tags.tosqlite3(part.database.path, 'flags')
        
        self.log('Wrote dimension tables to: '+sink_db_path)
        
        return True