def archiveFilesTask(tempTarFile=None, job=None, DEBUG_MODE=False, DESCRIPTION="", TAGS=[], DRY=False, EXTENDEDCIFS=False, crawlid=None): from archiver.archiveFiles import makeTar, uploadToGlacier, addPerms from celery.utils.log import get_task_logger logger = get_task_logger(__name__) global NUM_PROCS, TEMP_DIR, ACCESS_KEY, SECRET_ACCESS_KEY, GLACIER_VAULT, NUMFILES, ARCHIVEMB, GLACIER_REALM, USECELERY NUM_PROCS = settings.NUM_PROCS TEMP_DIR = settings.TEMP_DIR ACCESS_KEY = settings.ACCESS_KEY SECRET_ACCESS_KEY = settings.SECRET_ACCESS_KEY GLACIER_VAULT = settings.GLACIER_VAULT NUMFILES = settings.NUMFILES ARCHIVEMB = settings.ARCHIVEMB GLACIER_REALM = settings.GLACIER_REALM USECELERY = settings.USECELERY logger.info("Got job %s-files,%s,%s" % (len(job), DESCRIPTION, crawlid)) try: c = Archives().archive_create(short_description=DESCRIPTION, tags=TAGS, vault=GLACIER_VAULT) if not DRY: sid = transaction.savepoint() if DEBUG_MODE: logger.debug("Created archive in DB") #add files to temp archive on disk try: #add each to tarchive makeTar(job, tempTarFile, DRY) if not DRY: transaction.savepoint_commit(sid) if DEBUG_MODE: logger.info("Number of files in job: %s -- File %s" % (len(job), tempTarFile)) #add each to DB bulk = [] permissions = [] filelength = len(job) total_bytesize = 0 if tempTarFile: for jobf in job: jobfile = jobf['rfile'] statinfo = os.stat(jobfile) bytesize = statinfo.st_size atime = statinfo.st_atime mtime = statinfo.st_mtime ctime = statinfo.st_ctime afo = ArchiveFiles() afo_id = afo.get_next_data_id() f = ArchiveFiles( id=afo_id, archive=c, startdate=datetime.now(), bytesize=bytesize, filepath=jobfile, fileadate=datetime.fromtimestamp(atime), filecdate=datetime.fromtimestamp(ctime), filemdate=datetime.fromtimestamp(mtime), ) total_bytesize = total_bytesize + bytesize bulk.append(f) if EXTENDEDCIFS: permissions.append({ "perm": jobf['perms'], "fileobj": f }) if not DRY: ArchiveFiles.objects.bulk_create(bulk) transaction.savepoint() if EXTENDEDCIFS: for p in permissions: addPerms(p["perm"], p["fileobj"]) #upload to glacier archive_id = uploadToGlacier( tempTarFile=tempTarFile, DEBUG_MODE=DEBUG_MODE, GLACIER_VAULT=GLACIER_VAULT, SECRET_ACCESS_KEY=SECRET_ACCESS_KEY, ACCESS_KEY=ACCESS_KEY, GLACIER_REALM=GLACIER_REALM) c.update_archive_id(archive_id) c.bytesize = total_bytesize c.filecount = filelength c.save() try: crawlfil = Crawl.objects.filter(id=crawlid) crawlfil.update(bytesuploaded=(F('bytesuploaded') + total_bytesize)) transaction.savepoint() crawl = Crawl.objects.get(id=crawlid) if crawl.totalbytes and crawl.totalbytes > 0: logger.info( "Finished job %s: %s percent done total crawl. " % (DESCRIPTION, ((crawl.bytesuploaded * 100) / crawl.totalbytes))) else: logger.info( "Still crawling %s -- will get an ETA soon" % DESCRIPTION) except Exception, exc: logger.error("Error with updating crawl stats: %s" % (exc)) transaction.commit() else: transaction.rollback()
def archiveFiles (tempTarFile=None,dry=False): global queue global logger global GLACIER_VAULT global EXTENDEDCIFS if queue.empty() == True: print "the Queue is empty!" while queue.qsize()>0: print "the Queue has stuff: %s" % queue.qsize() try: job = queue.get() print "Got job %s-files" % len(job) try: #create database archive #c = Archives() c = Archives().archive_create(short_description=DESCRIPTION,tags=TAGS,vault=GLACIER_VAULT) if not dry: sid = transaction.savepoint() if DEBUG_MODE: logger.debug("Created archive in DB") #add files to temp archive on disk try: #add each to tarchive makeTar(job,tempTarFile,DRY) if not DRY: transaction.savepoint_commit(sid) if DEBUG_MODE: logger.debug("Number of files in job: %s -- File %s" % (len(job),tempTarFile)) #add each to DB bulk=[] permissions=[] filelength=len(job) total_bytesize=0 if tempTarFile: for jobf in job: jobfile = jobf['rfile'] statinfo = os.stat(jobfile) bytesize = statinfo.st_size atime = statinfo.st_atime mtime = statinfo.st_mtime ctime = statinfo.st_ctime f = ArchiveFiles(archive=c, startdate=datetime.now(), bytesize=bytesize, filepath=jobfile, fileadate=datetime.fromtimestamp(atime), filecdate=datetime.fromtimestamp(ctime), filemdate=datetime.fromtimestamp(mtime), ) total_bytesize=total_bytesize+bytesize bulk.append(f) if EXTENDEDCIFS: permissions.append({"perm":jobf['perms'],"fileobj":f}) #addPerms(jobf['perms'],f) if dry: if DEBUG_MODE: logger.debug("done task -- dry run -- %s " % tempTarFile) transaction.rollback() queue.task_done() else: ArchiveFiles.objects.bulk_create(bulk) if EXTENDEDCIFS: for p in permissions: addPerms(p["perm"],p["fileobj"]) #upload to glacier archive_id = uploadToGlacier(tempTarFile=tempTarFile, DEBUG_MODE=DEBUG_MODE, GLACIER_VAULT=GLACIER_VAULT, SECRET_ACCESS_KEY=SECRET_ACCESS_KEY, ACCESS_KEY=ACCESS_KEY, GLACIER_REALM=GLACIER_REALM) c.update_archive_id(archive_id) c.bytesize=total_bytesize c.filecount=filelength c.save() queue.task_done() transaction.commit() if DEBUG_MODE: logger.debug("done task: %s " % tempTarFile) except Exception,exc: logger.error('Error creating archive %s' % exc) transaction.rollback() queue.task_done() #get archive_id except Exception,exc: logger.error('Error creating archive2 %s' % exc) transaction.rollback() queue.task_done() except Exception,exc: logger.error("error on queue: %s" % exc) try: transaction.rollback() queue.task_done() except: pass
def archiveFiles(tempTarFile=None, dry=False): global queue global logger global GLACIER_VAULT global EXTENDEDCIFS if queue.empty() == True: print "the Queue is empty!" while queue.qsize() > 0: print "the Queue has stuff: %s" % queue.qsize() try: job = queue.get() print "Got job %s-files" % len(job) try: #create database archive #c = Archives() c = Archives().archive_create(short_description=DESCRIPTION, tags=TAGS, vault=GLACIER_VAULT) if not dry: sid = transaction.savepoint() if DEBUG_MODE: logger.debug("Created archive in DB") #add files to temp archive on disk try: #add each to tarchive makeTar(job, tempTarFile, DRY) if not DRY: transaction.savepoint_commit(sid) if DEBUG_MODE: logger.debug("Number of files in job: %s -- File %s" % (len(job), tempTarFile)) #add each to DB bulk = [] permissions = [] filelength = len(job) total_bytesize = 0 if tempTarFile: for jobf in job: jobfile = jobf['rfile'] statinfo = os.stat(jobfile) bytesize = statinfo.st_size atime = statinfo.st_atime mtime = statinfo.st_mtime ctime = statinfo.st_ctime f = ArchiveFiles( archive=c, startdate=datetime.now(), bytesize=bytesize, filepath=jobfile, fileadate=datetime.fromtimestamp(atime), filecdate=datetime.fromtimestamp(ctime), filemdate=datetime.fromtimestamp(mtime), ) total_bytesize = total_bytesize + bytesize bulk.append(f) if EXTENDEDCIFS: permissions.append({ "perm": jobf['perms'], "fileobj": f }) #addPerms(jobf['perms'],f) if dry: if DEBUG_MODE: logger.debug("done task -- dry run -- %s " % tempTarFile) transaction.rollback() queue.task_done() else: ArchiveFiles.objects.bulk_create(bulk) if EXTENDEDCIFS: for p in permissions: addPerms(p["perm"], p["fileobj"]) #upload to glacier archive_id = uploadToGlacier( tempTarFile=tempTarFile, DEBUG_MODE=DEBUG_MODE, GLACIER_VAULT=GLACIER_VAULT, SECRET_ACCESS_KEY=SECRET_ACCESS_KEY, ACCESS_KEY=ACCESS_KEY, GLACIER_REALM=GLACIER_REALM) c.update_archive_id(archive_id) c.bytesize = total_bytesize c.filecount = filelength c.save() queue.task_done() transaction.commit() if DEBUG_MODE: logger.debug("done task: %s " % tempTarFile) except Exception, exc: logger.error('Error creating archive %s' % exc) transaction.rollback() queue.task_done() #get archive_id except Exception, exc: logger.error('Error creating archive2 %s' % exc) transaction.rollback() queue.task_done() except Exception, exc: logger.error("error on queue: %s" % exc) try: transaction.rollback() queue.task_done() except: pass
def archiveFilesTask( tempTarFile=None, job=None, DEBUG_MODE=False, DESCRIPTION="", TAGS=[], DRY=False, EXTENDEDCIFS=False, crawlid=None ): from archiver.archiveFiles import makeTar, uploadToGlacier, addPerms from celery.utils.log import get_task_logger logger = get_task_logger(__name__) global NUM_PROCS, TEMP_DIR, ACCESS_KEY, SECRET_ACCESS_KEY, GLACIER_VAULT, NUMFILES, ARCHIVEMB, GLACIER_REALM, USECELERY NUM_PROCS = settings.NUM_PROCS TEMP_DIR = settings.TEMP_DIR ACCESS_KEY = settings.ACCESS_KEY SECRET_ACCESS_KEY = settings.SECRET_ACCESS_KEY GLACIER_VAULT = settings.GLACIER_VAULT NUMFILES = settings.NUMFILES ARCHIVEMB = settings.ARCHIVEMB GLACIER_REALM = settings.GLACIER_REALM USECELERY = settings.USECELERY logger.info("Got job %s-files,%s,%s" % (len(job), DESCRIPTION, crawlid)) try: c = Archives().archive_create(short_description=DESCRIPTION, tags=TAGS, vault=GLACIER_VAULT) if not DRY: sid = transaction.savepoint() if DEBUG_MODE: logger.debug("Created archive in DB") # add files to temp archive on disk try: # add each to tarchive makeTar(job, tempTarFile, DRY) if not DRY: transaction.savepoint_commit(sid) if DEBUG_MODE: logger.info("Number of files in job: %s -- File %s" % (len(job), tempTarFile)) # add each to DB bulk = [] permissions = [] filelength = len(job) total_bytesize = 0 if tempTarFile: for jobf in job: jobfile = jobf["rfile"] statinfo = os.stat(jobfile) bytesize = statinfo.st_size atime = statinfo.st_atime mtime = statinfo.st_mtime ctime = statinfo.st_ctime afo = ArchiveFiles() afo_id = afo.get_next_data_id() f = ArchiveFiles( id=afo_id, archive=c, startdate=datetime.now(), bytesize=bytesize, filepath=jobfile, fileadate=datetime.fromtimestamp(atime), filecdate=datetime.fromtimestamp(ctime), filemdate=datetime.fromtimestamp(mtime), ) total_bytesize = total_bytesize + bytesize bulk.append(f) if EXTENDEDCIFS: permissions.append({"perm": jobf["perms"], "fileobj": f}) if not DRY: ArchiveFiles.objects.bulk_create(bulk) transaction.savepoint() if EXTENDEDCIFS: for p in permissions: addPerms(p["perm"], p["fileobj"]) # upload to glacier archive_id = uploadToGlacier( tempTarFile=tempTarFile, DEBUG_MODE=DEBUG_MODE, GLACIER_VAULT=GLACIER_VAULT, SECRET_ACCESS_KEY=SECRET_ACCESS_KEY, ACCESS_KEY=ACCESS_KEY, GLACIER_REALM=GLACIER_REALM, ) c.update_archive_id(archive_id) c.bytesize = total_bytesize c.filecount = filelength c.save() try: crawlfil = Crawl.objects.filter(id=crawlid) crawlfil.update(bytesuploaded=(F("bytesuploaded") + total_bytesize)) transaction.savepoint() crawl = Crawl.objects.get(id=crawlid) if crawl.totalbytes and crawl.totalbytes > 0: logger.info( "Finished job %s: %s percent done total crawl. " % (DESCRIPTION, ((crawl.bytesuploaded * 100) / crawl.totalbytes)) ) else: logger.info("Still crawling %s -- will get an ETA soon" % DESCRIPTION) except Exception, exc: logger.error("Error with updating crawl stats: %s" % (exc)) transaction.commit() else: transaction.rollback()