Exemple #1
0
	def addFile(self,rfile,statinfo):
		perms=[]
		gc.collect()
		from archiver.archiveFiles import id_generator 
		kilo_byte_size = self.arraysize/1024
		mega_byte_size = kilo_byte_size/1024
		if self.extendedcifs:
			perms = self.buildPerms(perms,rfile)
			
		if len(self.jobarray)<self.numfiles and mega_byte_size<self.archivemb:
			self.jobarray.append({"rfile":rfile,"perms":perms})
			self.arraysize = self.arraysize+statinfo.st_size
		else:
			jobcopy = self.jobarray
			if self.usecelery:
				#self.alljobs.append(jobcopy)
				id_gen = self.temp_dir+"/"+id_generator(size=16)
				af.apply_async(args=[id_gen, jobcopy,self.debug,self.description,self.tags,self.dry,self.extendedcifs,self.crawlid])
				del jobcopy[:]
				gc.collect()
			#else:
			#	self.queue.put(jobcopy)
			self.totaljobsize=self.arraysize+self.totaljobsize
			del self.jobarray[:]
			gc.collect()
			self.jobarray=[]
			self.arraysize=0
			self.jobarray.append({"rfile":rfile,"perms":perms})
			self.arraysize = self.arraysize+statinfo.st_size
Exemple #2
0
	def recurseCrawl(self,filepath=filepath):
		global logger
		from archiver.archiveFiles import id_generator
		for (path, dirs, files) in os.walk(filepath):
			for fi in files:
				kilo_byte_size = self.arraysize/1024
				mega_byte_size = kilo_byte_size/1024
				rfile = os.path.join(path,fi)
				if os.path.islink(rfile):
					continue
				statinfo = os.stat(rfile)
				if self.oldertime>0 or self.newertime>0:
					dateatime = datetime.fromtimestamp(statinfo.st_mtime)
					#datemtime = datetime.fromtimestamp(statinfo.st_mtime)
					if self.oldertime>0 and self.newertime>0:
						#between
						if (dateatime < (datetime.now() - timedelta(days=self.oldertime))) and (dateatime > (datetime.now() - timedelta(days=self.newertime))):
							#self.filelist.append(rfile)
							self.addFile(rfile,statinfo)
							continue
					elif self.oldertime>0 and self.newertime==0:
						#print "%s %s" % (dateatime, (datetime.now() - timedelta(days=self.oldertime)))
						if (dateatime < (datetime.now() - timedelta(days=self.oldertime))):
							#self.filelist.append(rfile)
							self.addFile(rfile,statinfo)
							continue
					elif self.oldertime==0 and self.newertime>0:
						if (dateatime > (datetime.now() - timedelta(days=self.newertime))):
							#self.filelist.append(rfile)
							self.addFile(rfile,statinfo)
							continue
					else:
						continue
				else:
					#self.filelist.append(rfile)
					self.addFile(rfile,statinfo)
				gc.collect()
		jobcopy = self.jobarray
		if self.usecelery:
			if len(jobcopy)>0:
				#self.alljobs.append(jobcopy)
				id_gen=self.temp_dir+"/"+id_generator(size=16)
				af.apply_async(args=[id_gen, jobcopy,self.debug,self.description,self.tags,self.dry,self.extendedcifs,self.crawlid])
		else:
			pass	
			#if len(jobcopy)>0:
			#	self.queue.put(jobcopy)
		self.totaljobsize=self.totaljobsize+self.arraysize
		logger.info("Done crawl %s %s %s bytes" % (filepath,self.crawlid,self.totaljobsize))			
		return