def __iter__(self): if self.restype=="URL": self.tempfile=self.resfile=tempfile.NamedTemporaryFile(mode="w+b") resource=urllib2.urlopen(self.res) data=resource.read(1024*1024) while data: self.resfile.write(data) data=resource.read(1024*1024) self.resfile.seek(0) if self.compression=="GZ": self.resfile = FastGzip(fileobj=self.resfile) elif self.compression=="BZ2": self.resfile = bz2.BZ2File(self.resfile.name,mode='rb') else: if self.compression=="GZ": self.resfile = FastGzip(self.res) elif self.compression=="BZ2": self.resfile = bz2.BZ2File(self.res,mode='rb') else: self.resfile=open(self.res) self.nextobj=self.resfile.__iter__() return self
class Resource: def __init__(self,individual,res): self.individual=individual self.res=res self.linecount=0 if re.match("^(http|ftp|https):",res,re.I): try: urllib2.urlopen(res).read(100) except: raise Exception("Resource not found: {0}".format(res)) self.restype="URL" else: if not os.path.isfile(res): raise Exception("File not found: {0}".format(res)) self.restype="FILE" if res.lower().endswith(".gz"): self.compression="GZ" elif res.lower().endswith(".bz2"): self.compression="BZ2" else: self.compression="RAW" def __iter__(self): if self.restype=="URL": self.tempfile=self.resfile=tempfile.NamedTemporaryFile(mode="w+b") resource=urllib2.urlopen(self.res) data=resource.read(1024*1024) while data: self.resfile.write(data) data=resource.read(1024*1024) self.resfile.seek(0) if self.compression=="GZ": self.resfile = FastGzip(fileobj=self.resfile) elif self.compression=="BZ2": self.resfile = bz2.BZ2File(self.resfile.name,mode='rb') else: if self.compression=="GZ": self.resfile = FastGzip(self.res) elif self.compression=="BZ2": self.resfile = bz2.BZ2File(self.res,mode='rb') else: self.resfile=open(self.res) self.nextobj=self.resfile.__iter__() return self def next(self): line=self.nextobj.next() if not self.linecount % 4: line="@"+self.individual+"@"+line self.linecount+=1 return line def close(self): if self.restype=="URL": if self.compression!="RAW": self.resfile.close() self.tempfile.close() else: self.resfile.close()