Python disk_usage Exemples, manage_storage.disk_queries.disk_usage Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : models.py Projet : billyziege/pipeline_project

 def __is_complete__(self,*args,**kwargs):
     """
     Check to the complete file of the zcat process and handles notifications (if any).
     """
     if GenericProcess.__is_complete__(self,*args,**kwargs):
         return True
     elif not os.path.isfile(self.complete_file):
         #print self.complete_file
         return False
     #If the process is complete, check to make sure that the sizes of the file are adequate.  If not, send email.
     size1 = int(disk_usage(self.r1_path))
     size2 = int(disk_usage(self.r2_path))
     size = size2
     if size1 < size2:
         size = size1
     #Send an email if the size of the fastq is smaller than the expected size.
     #if size < int(configs['pipeline'].get('Storage','expected_fastq_size')):
         #template_subject = os.path.join(configs['system'].get('Common_directories','template'),configs['pipeline'].get('Zcat_email_templates','size_subject'))
         #template_body = os.path.join(configs['system'].get('Common_directories','template'),configs['pipeline'].get('Zcat_email_templates','size_body'))
         #dictionary = {}
         #for k,v in self.__dict__.iteritems():
         #    dictionary.update({k:str(v)})
         #dictionary.update({'size':size})
         #subject = fill_template(template_subject,dictionary)
         #body = fill_template(template_body, dictionary)
         #send_email(subject,body)
     return True

Exemple #2

0

Afficher le fichier

Fichier : scripts.py Projet : billyziege/pipeline_project

def storage_currently_used_by_pipeline(config,mockdb,pipeline):
    """
    Determines the amount of storage currently used by the pipeline's output
    directory.
    """
    if pipeline.bcbio_key != None:
        bcbio = mockdb['Bcbio'].__get__(config,pipeline.bcbio_key)
        return disk_usage(bcbio.output_dir)
    elif pipeline.zcat_key != None:
        zcat = mockdb['Zcat'].__get__(config,pipeline.zcat_key)
        return disk_usage(zcat.output_dir)
    return 0

Exemple #3

0

Afficher le fichier

Fichier : models.py Projet : billyziege/pipeline_project

 def __finish__(self,*args,**kwargs):
     """
     Finishes the bcltofastq pipeline.  This is separated
     out due to the consolidation of multiple directories into a single email
     and to isolate it for specific pipelines.
     """
     problem_dirs = []
     sample_dirs = list_sample_dirs(self.output_dir.split(":"))
     for sample in sample_dirs:
         for sample_dir in sample_dirs[sample]:
             if (int(disk_usage(sample_dir)) < 200000):
                 problem_dirs.append(sample_dir)
     if len(problem_dirs) > 0:
         message = "The following directory(ies) is(are) less than 200MB:\n"
         for problem_dir in problem_dirs:
             message += "\t" + problem_dir + "\n"
         message += "Please check.\n"
         #send_email("Small sample directory",message,recipients='[email protected],[email protected]')  
     GenericPipeline.__finish__(self,*args,**kwargs)
     return 1

Exemple #4

0

Afficher le fichier

Fichier : models.py Projet : billyziege/pipeline_project

 def __is_complete__(self,configs,mockdb,*args,**kwargs):
     """
     Due to the inclusion of sub-processes (bclto fastq pipeline, illuminate, and launched pipelines),
     this function contains the logic to check to makes sure all of these processes
     have completed successfully.
     """
     if configs["system"].get("Logging","debug") is "True":
         print "Checking to see if seq run is complete (and advancing post-seq run pipeline)"
     if not os.path.isfile(self.complete_file):
         if configs["system"].get("Logging","debug") is "True":
             print "    Missing complete file " + self.complete_file
         return False
     if not hasattr(self,"interop_archived") or self.interop_archived is False:
         output_name = os.path.basename(self.output_dir)
         if not self.__archive_sequencing_run_data__(configs,self.output_dir,os.path.join(configs["system"].get('Common_directories','hiseq_run_log'),output_name)):
             if configs["system"].get("Logging","debug") is "True":
                 print "    Sequencing run data not archived (InterOp and so forth)."
             return False
     if not hasattr(self,"bcltofastq_pipeline_key") or self.bcltofastq_pipeline_key is None or not hasattr(self,"illuminate_key") or self.illuminate_key is None:
         if not hasattr(self,"bcltofastq_pipeline_key") or self.bcltofastq_pipeline_key is None:
             self.__start_bcltofastq_pipeline__(configs,mockdb)
             if configs["system"].get("Logging","debug") is "True":
                 print "    Starting bcltofastq pipeline."
         if not hasattr(self,"illuminate_key") or self.illuminate_key is None:
             self.__launch_illuminate__(configs,mockdb)
             if configs["system"].get("Logging","debug") is "True":
                 print "    Starting illuminate."
         return False
     illuminate = mockdb['Illuminate'].__get__(configs['system'],self.illuminate_key)
     if not illuminate.__is_complete__(configs,mockdb=mockdb,*args,**kwargs):
         if configs["system"].get("Logging","debug") is "True":
             print "    Illuminate not done"
         return False
     bcl2fastq_pipeline = mockdb['BclToFastqPipeline'].__get__(configs['system'],self.bcltofastq_pipeline_key)
     if not bcl2fastq_pipeline.__is_complete__(configs,mockdb=mockdb,*args,**kwargs):
         if configs["system"].get("Logging","debug") is "True":
             print "    bcltofastq not done"
         return False
     if not hasattr(self,"generic_copy_key") or self.generic_copy_key is None:
         self.__launch_archive_fastq__(configs,mockdb)
         if configs["system"].get("Logging","debug") is "True":
             print "    Launching archive"
         return False
     archive = mockdb['GenericCopy'].__get__(configs['system'],self.generic_copy_key)
     if archive.__is_complete__(*args,**kwargs):
         if not disk_usage(self.fastq_archive) > 30000000:
             if not hasattr(self,'fastq_archive_reported') or self.fastq_archive_reported is None:
                 message = "The flowcell "+self.flowcell_key+" has finished casava, but the archive is not as large as expected.\n"
                 message += "\nPlease check.\n\n"
                 recipients = configs["system"].get('Email','recipients_for_fastq')
                 send_email("Flowcell size problem.",message,recipients=recipients)  
                 self.fastq_archive_reported = True
             return False
         fastq_check = check_fastq_output(self.fastq_archive)
         if fastq_check["md5"] == [] and fastq_check["fastqc"] == [] and fastq_check["index"] is True and fastq_check["sample_sheet"] is True:
             if not hasattr(self,"fastq_check_report") or self.fastq_check_report is None:
                 message = "Just informing you of the completion of the flowcell.\n"
                 recipients = configs["system"].get('Email','recipients_for_fastq')
                 send_email("The fastq have been successully generated for " + self.flowcell_key + ".",message,recipients=recipients)  
                 self.fastq_check_report = True
         else:              
             if not hasattr(self,"fastq_check_report") or self.fastq_check_report is None:
                 message = "Report detailing the issues with the flowcell directory for flowcell " + self.flowcell_key + ".\n"
                 if not fastq_check["sample_sheet"] is True:
                     message += "Sample sheet missing from " + self.archive_fastq + ".\n"
                 else:
                     if not fastq_check["index"]:
                         message += "Index counts not generated.\n"
                     if len(fastq_check["fastqc"]) != 0:
                         message += "The following directories do not have fastqc results:"
                         message += "\n\t".join(fastq_check["fastqc"]) + "\n"
                     if len(fastq_check["md5"]) != 0:
                         message += "The following directories do not have md5 checksums:"
                     message += "\n\t".join(fastq_check["md5"]) + "\n"
                 recipients = configs["system"].get('Email','recipients_for_fastq')
                 send_email("Problem with fastq generation for " + self.flowcell_key + ".",message,recipients=recipients)  
                 self.fastq_check_report = True
             return False
             
         if not hasattr(self,"generic_clean_key") or self.generic_clean_key is None:
             if hasattr(self,'fastq_archive_reported') and self.fastq_archive_reported is True:
                 message = "The flowcell "+self.flowcell_key+" has finished casava, and is now big enough.\n"
                 message += "\nContinuing.\n\n"
                 recipients = configs["system"].get('Email','recipients_for_fastq')
                 send_email("Flowcell size problem resolved.",message,recipients=recipients)  
             self.__launch_clean__(configs,mockdb)
         self.__link_to_web_portal__(configs['system'])
         if configs["system"].get("Logging","debug") is "True":
             print "  Filling stats"
         flowcell = mockdb['Flowcell'].__get__(configs['system'],self.flowcell_key)
         machine = mockdb['HiSeqMachine'].__get__(configs['system'],self.machine_key)
         fill_demultiplex_stats(configs['system'],mockdb,self.output_dir,flowcell,machine)
             #return False
     else:
         if configs["system"].get("Logging","debug") is "True":
             print "    Fastq archive not complete"
         return False     
     clean = mockdb['GenericClean'].__get__(configs['system'],self.generic_clean_key)
     if clean.__is_complete__(*args,**kwargs):
         self.__finish__(*args,**kwargs)
         return True
     return False

Exemple #5

0

Afficher le fichier

Fichier : scripts.py Projet : billyziege/pipeline_project

    largest_available = None
    needed_storage = int(configs['pipeline'].get('Storage','needed'))
    for location in configs['system'].get('Location_options','list').split(','):
        storage_device = storage_devices[location]
        current_available = storage_device.available - storage_device.waiting
        if best_location == None:
            best_location = location
            largest_available = current_available
            continue
        if current_available > largest_available:
            best_location = location
            largest_available = current_available
    storage_devices[best_location].waiting += needed_storage
    return best_location

def initiate_storage_devices(config):
    """
    Load the storage device objects into memory and initializes
    import attributes.
    """
    location_dirs = get_location_dictionary_from_config(config)
    storage_devices = {}
    for name, directory in location_dirs.iteritems():
        storage_devices.update({name:StorageDevice(directory=directory,name=name,limit=config.get('Storage','limit'))})
    return storage_devices



if __name__ == '__main__':
    print disk_usage(sys.argv[1])