Ejemplo n.º 1
0
class StdIOInput(AbstractInput):
    io = None
    
    def __init__(self,io = sys.stdin):
        self.io = io
        self.formatter = PassingFormatter()

    def read(self):
        return self.formatter.format(self.io)
Ejemplo n.º 2
0
class MemInput(AbstractInput):
    data = None
    
    def __init__(self,data=[]):
        self.data = data
        self.formatter = PassingFormatter()
    
    
    @staticmethod 
    def load_from_file(filename):
        start_time = datetime.datetime.now()
        itself = MemInput()
        f = open(filename)
        
        for line in f:
            itself.data.append(line)
        f.close()
        print "Load file %s in %s" % (filename, datetime.datetime.now() - start_time)
        return itself
    
    def to_file(self):
        filename = '/var/tmp/%s' % str(uuid.uuid1()) 
        f = open(filename,mode='w')
        map(lambda item : f.write(json.dumps(item)),self.data)
        f.close()
        return filename
    
        
    def read(self):
        return self.formatter.format(iter(self.data))
    
    def sample(self,size=100):
        return self.data[0:size-1]

    def close(self):
        pass
    
    def get_estimated_size(self):
        return len(self.data), MemInput(self.data[:999]) if len(self.data)>1000 else self
    
    def count(self,engine=None,debug=False,options={}):
        return len(self.data)
Ejemplo n.º 3
0
class FileInput(AbstractInput):
    file = None
    filename = None
    
    def __init__(self,filename):
        self.filename = filename
        self.formatter = PassingFormatter()
    
    def to_file(self):
        return self.filename
    
    def read(self):
        self.file = open(self.filename)
        return self.formatter.format(self.file)
    
    def close(self):
        if self.file != None:
            self.file.close()
    def as_output(self):
        return FileOutput(self.filename)
    
    def get_estimated_size(self,sample_size = 1000):
        file_size = os.stat(self.filename).st_size
        f = open(self.filename)
        count = 0
        line_size = 0
        for line in f:
            count +=1
            line_size += len(line)
            
            if count >= sample_size:
                break
        
        f.close()
        
        return int(sample_size * float(file_size) / float(line_size))
    
    def sample(self,size=100):
        self.file = open(self.filename)
        sample = []
        i=0
        for row in self.file:
            sample.append(row)
            if i>=size:
                break
            else:
                i+=1
        
        return self.formatter.format(sample)
    
    
    def count(self,engine=None,debug=False,options={}):
        out = MemOutput()
        Count().run(self,out,engine,debug,options)
        return out.data[0][1][0]

    def compute(self,mapred,engine=None,debug=False,options={}):
        out = MemOutput()
        mapred.run(self,out,engine,debug,options)
        return out.data
    
    def map_reduce(self,mapred,output,engine=None,debug=False,options={}):
        mapred.run(self,output,engine,debug,options)
    
    def filter(self,filter_function, output=None,engine=None,debug=False,options={}):
        if output is None:
            output = self.as_output()
            output.filename = '/var/tmp/%s' % uuid1()
            
        mapred = Filter()
        mapred.set_function(filter_function)
        mapred.run(self, output, engine, debug, options)
            
        return output.as_input()
Ejemplo n.º 4
0
 def __init__(self,filename):
     self.filename = filename
     self.formatter = PassingFormatter()
Ejemplo n.º 5
0
 def __init__(self,data=[]):
     self.data = data
     self.formatter = PassingFormatter()
Ejemplo n.º 6
0
 def __init__(self,io = sys.stdin):
     self.io = io
     self.formatter = PassingFormatter()