def produce_analysis_piper(ngi_config, project_id): #create piper_ngi analysis_dir = os.path.join(ngi_config["analysis"]["base_root"], ngi_config["analysis"]["sthlm_root"], ngi_config["analysis"]["top_dir"], "ANALYSIS", project_id) data_dir = os.path.join(ngi_config["analysis"]["base_root"], ngi_config["analysis"]["sthlm_root"], ngi_config["analysis"]["top_dir"], "DATA", project_id) piper_ngi_dir = os.path.join(analysis_dir, "piper_ngi") fs.create_folder(piper_ngi_dir) piper_dirs = ["01_raw_alignments","02_preliminary_alignment_qc","03_genotype_concordance", "04_merged_aligments","05_processed_alignments","06_final_alignment_qc","07_variant_calls","08_misc"] for piper_dir in piper_dirs: current_dir = os.path.join(piper_ngi_dir, piper_dir) fs.create_folder(current_dir) if piper_dir == "05_processed_alignments": for sample_id in os.listdir(data_dir): bam_file = "{}.clean.dedup.bam".format(sample_id) fs.touch(os.path.join(current_dir, bam_file)) if piper_dir == "07_variant_calls": for sample_id in os.listdir(data_dir): vcf_file = "{}.clean.dedup.recal.bam.raw.indel.vcf.gz".format(sample_id) fs.touch(os.path.join(current_dir, vcf_file)) current_dir = os.path.join(piper_ngi_dir, "sbatch") fs.create_folder(current_dir) current_dir = os.path.join(piper_ngi_dir, "setup_xml_files") fs.create_folder(current_dir) current_dir = os.path.join(piper_ngi_dir, "logs") fs.create_folder(current_dir) create_version_report(current_dir)
def create_uppmax_env(ngi_config): paths = {} if 'analysis' not in ngi_config: sys.exit('ERROR: analysis must be a field of NGI_CONFIG.') try: base_root = ngi_config['analysis']['base_root'] paths['base_root'] = base_root sthlm_root = ngi_config['analysis']['sthlm_root'] paths['sthlm_root'] = sthlm_root top_dir = ngi_config['analysis']['top_dir'] paths['top_dir'] = top_dir except KeyError as e: raise SystemExit('Config file is missing the key {}, make sure it have all required information'.format(str(e))) if 'environment' not in ngi_config: sys.exit('ERROR: environment must be a field of NGI_CONFIG.') try: # Get base root flowcell_inboxes = ngi_config['environment']['flowcell_inbox'] flowcell_inbox = flowcell_inboxes[0] # I assume there is only one paths['flowcell_inbox'] = flowcell_inbox except ValueError as e: sys.exit('key error, flowcell_inbox not found in "{}": {}'.format(ngi_config, e)) # Now I need to create the folders for this if not os.path.exists(base_root): sys.exit('base_root needs to exists: {}'.format(base_root)) fs.create_folder(flowcell_inbox) if sthlm_root is None: path_to_analysis = os.path.join(base_root, top_dir) else: path_to_analysis = os.path.join(base_root, sthlm_root, top_dir) fs.create_folder(path_to_analysis) return paths
def create_uppmax_env(ngi_config): paths = {} if "analysis" not in ngi_config: sys.exit("ERROR: analysis must be a field of NGI_CONFIG.") try: base_root = ngi_config["analysis"]["base_root"] paths["base_root"] = base_root sthlm_root = ngi_config["analysis"]["sthlm_root"] paths["sthlm_root"] = sthlm_root top_dir = ngi_config["analysis"]["top_dir"] paths["top_dir"] = top_dir except KeyError as e: raise SystemExit("Config file is missing the key {}, make sure it have all required information".format(str(e))) if "environment" not in ngi_config: sys.exit("ERROR: environment must be a field of NGI_CONFIG.") try: # get base root flowcell_inboxes = ngi_config["environment"]["flowcell_inbox"] flowcell_inbox = flowcell_inboxes[0] # I assume there is only one paths["flowcell_inbox"] = flowcell_inbox except ValueError as e: sys.exit('key error, flowcell_inbox not found in "{}": {}'.format(ngi_config, e)) #now I need to create the folders for this if not os.path.exists(base_root): sys.exit('base_root needs to exists: {}'.format(base_root)) fs.create_folder(flowcell_inbox) if sthlm_root is None: path_to_analysis = os.path.join(base_root, top_dir) else: path_to_analysis = os.path.join(base_root, sthlm_root, top_dir) fs.create_folder(path_to_analysis) return paths
def do_delivery(self): """ Deliver the staged delivery folder using rsync :returns: True if delivery was successful, False if unsuccessful :raises DelivererRsyncError: if an exception occurred during transfer """ agent = transfer.RsyncAgent( self.expand_path(self.stagingpath), dest_path=self.expand_path(self.deliverypath), digestfile=self.delivered_digestfile(), remote_host=getattr(self, 'remote_host', None), remote_user=getattr(self, 'remote_user', None), log=logger, opts={ '--files-from': [self.staging_filelist()], '--copy-links': None, '--recursive': None, '--perms': None, '--chmod': 'ug+rwX,o-rwx', '--verbose': None, '--exclude': ["*rsync.out", "*rsync.err"] }) create_folder(os.path.dirname(self.transfer_log())) try: return agent.transfer(transfer_log=self.transfer_log()) except transfer.TransferError as e: raise DelivererRsyncError(e)
def stage_delivery(self): """ Stage a delivery by symlinking source paths to destination paths according to the returned tuples from the gather_files function. Checksums will be written to a digest file in the staging path. Failure to stage individual files will be logged as warnings but will not terminate the staging. :raises DelivererError: if an unexpected error occurred """ digestpath = self.staging_digestfile() filelistpath = self.staging_filelist() create_folder(os.path.dirname(digestpath)) try: with open(digestpath, 'w') as dh, open(filelistpath, 'w') as fh: agent = transfer.SymlinkAgent(None, None, relative=True) for src, dst, digest in self.gather_files(): agent.src_path = src agent.dest_path = dst try: agent.transfer() except (transfer.TransferError, transfer.SymlinkError) as e: logger.warning("failed to stage file '{}' when " "delivering {} - reason: {}".format(src, str(self), e)) fpath = os.path.relpath(dst, self.expand_path(self.stagingpath)) fh.write("{}\n".format(fpath)) if digest is not None: dh.write("{} {}\n".format(digest, fpath)) # finally, include the digestfile in the list of files to deliver fh.write("{}\n".format(os.path.basename(digestpath))) except (IOError, fs.FileNotFoundException, fs.PatternNotMatchedException) as e: raise DelivererError( "failed to stage delivery - reason: {}".format(e)) return True
def deliver_run_folder(self): '''Hard stages run folder and initiates delivery ''' #stage the data dst = self.expand_path(self.stagingpathhard) path_to_data = self.expand_path(self.datapath) runfolder_archive = os.path.join(path_to_data, self.fcid + ".tar.gz") runfolder_md5file = runfolder_archive + ".md5" question = "This project has been marked as SENSITIVE (option --sensitive). Do you want to proceed with delivery? " if not self.sensitive: question = "This project has been marked as NON-SENSITIVE (option --no-sensitive). Do you want to proceed with delivery? " if proceed_or_not(question): logger.info( "Delivering {} to GRUS with mover. Project marked as SENSITIVE={}" .format(str(self), self.sensitive)) else: logger.error( "{} delivery has been aborted. Sensitive level was WRONG.". format(str(self))) return False status = True create_folder(dst) try: shutil.copy(runfolder_archive, dst) shutil.copy(runfolder_md5file, dst) logger.info("Copying files {} and {} to {}".format( runfolder_archive, runfolder_md5file, dst)) except IOError, e: logger.error( "Unable to copy files to {}. Please check that the files exist and that the filenames match the flowcell ID." .format(dst))
def deliver_project(self): """ Deliver all samples in a project to grus :returns: True if all samples were delivered successfully, False if any sample was not properly delivered or ready to be delivered """ # moved this part from constructor, as we can create an object without running the delivery (e.g. to check_delivery_status) #check if the project directory already exists, if so abort hard_stagepath = self.expand_path(self.stagingpathhard) if os.path.exists(hard_stagepath): logger.error("In {} found already folder {}. No multiple mover deliveries are allowed".format( hard_stagepath, self.projectid)) raise DelivererInterruptedError("Hard Staged Folder already present") else: #otherwise lock the delivery by creating the folder create_folder(hard_stagepath) logger.info("Delivering {} to GRUS".format(str(self))) if self.get_delivery_status() == 'DELIVERED' \ and not self.force: logger.info("{} has already been delivered".format(str(self))) return True status = True try: # connect to charon, return list of sample objects samples_to_deliver = self.get_staged_samples_from_charon() except Exception, e: logger.error("Cannot get samples from Charon. Error says: {}".format(str(e))) logger.exception(e) exit(1)
def create_FC(incoming_dir, run_name, samplesheet, fastq_1 = None, fastq_2=None ): # Create something like 160217_ST-E00201_0063_AHJHNYCCXX path_to_fc = os.path.join(incoming_dir, run_name) if os.path.exists(path_to_fc): # This FC exists, skip it return fs.create_folder(path_to_fc) fs.touch(os.path.join(path_to_fc, 'RTAComplete.txt')) # Create folder Demultiplexing fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing')) # Create folder Demultiplexing/Reports fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing', 'Reports')) # Create folder Demultiplexing/Stats fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing', 'Stats')) # Memorise SampleSheet stats header = [] for key in samplesheet[0]: header.append(key) counter = 1 current_lane = '' for line in samplesheet: project_name = line.get('Sample_Project', line.get('Project', '')) lane = line['Lane'] if current_lane == '': current_lane = lane elif current_lane != lane: counter = 1 current_lane = lane sample_id = line.get('SampleID', line.get('Sample_ID', '')) sample_name = line.get('SampleName', line.get('Sample_Name', '')) # Create dir structure fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing', project_name, sample_id)) # Now create the data fastq_1_dest = '{}_S{}_L00{}_R1_001.fastq.gz'.format(sample_name, counter, lane) fastq_2_dest = '{}_S{}_L00{}_R2_001.fastq.gz'.format(sample_name, counter, lane) counter += 1 if fastq_1 is None: fs.touch(os.path.join(path_to_fc, 'Demultiplexing', project_name, sample_id, fastq_1_dest)) fs.touch(os.path.join(path_to_fc, 'Demultiplexing', project_name, sample_id, fastq_2_dest)) else: fs.do_symlink(fastq_1, os.path.join(path_to_fc, 'Demultiplexing', project_name, sample_id, fastq_1_dest)) fs.do_symlink(fastq_2, os.path.join(path_to_fc, 'Demultiplexing', project_name, sample_id, fastq_2_dest)) with open(os.path.join(path_to_fc, 'SampleSheet.csv'), 'w') as Samplesheet_file: Samplesheet_file.write(u'[Header]\n') Samplesheet_file.write(u'Date,2016-03-29\n') Samplesheet_file.write(u'Investigator Name,Christian Natanaelsson\n') Samplesheet_file.write(u'[Data]\n') for key in header: Samplesheet_file.write(u'{},'.format(key)) Samplesheet_file.write(u'\n') for line in samplesheet: for key in header: Samplesheet_file.write(u'{},'.format(line[key])) Samplesheet_file.write(u'\n')
def deliver_project(self): """ Deliver all samples in a project to grus :returns: True if all samples were delivered successfully, False if any sample was not properly delivered or ready to be delivered """ #first thing check that we are using mover 1.0.0 if not check_mover_version(): logger.error("Not delivering becouse wrong mover version detected") return False # moved this part from constructor, as we can create an object without running the delivery (e.g. to check_delivery_status) #check if the project directory already exists, if so abort soft_stagepath = self.expand_path(self.stagingpath) hard_stagepath = self.expand_path(self.stagingpathhard) if os.path.exists(hard_stagepath): logger.error("In {} found already folder {}. No multiple mover deliveries are allowed".format( hard_stagepath, self.projectid)) raise DelivererInterruptedError("Hard Staged Folder already present") #check that this project is not under delivery with mover already in this case stop delivery if self.get_delivery_status() == 'DELIVERED' \ and not self.force: logger.info("{} has already been delivered. This project will not be delivered again this time.".format(str(self))) return True elif self.get_delivery_status() == 'IN_PROGRESS': logger.error("Project {} is already under delivery. No multiple mover deliveries are allowed".format( self.projectid)) raise DelivererInterruptedError("Proejct already under delivery with Mover") elif self.get_delivery_status() == 'PARTIAL': logger.warning("{} has already been partially delivered. Please confirm you want to proceed.".format(str(self))) if proceed_or_not("Do you want to proceed (yes/no): "): logger.info("{} has already been partially delivered. User confirmed to proceed.".format(str(self))) else: logger.error("{} has already been partially delivered. User decided to not proceed.".format(str(self))) return False #now check if the sensitive flag has been set in the correct way question = "This project has been marked as SENSITIVE (option --sensitive). Do you want to proceed with delivery? " if not self.sensitive: question = "This project has been marked as NON-SENSITIVE (option --no-sensitive). Do you want to proceed with delivery? " if proceed_or_not(question): logger.info("Delivering {} to GRUS with mover. Project marked as SENSITIVE={}".format(str(self), self.sensitive)) else: logger.error("{} delivery has been aborted. Sensitive level was WRONG.".format(str(self))) return False #now start with the real work status = True #otherwise lock the delivery by creating the folder create_folder(hard_stagepath) #now find the PI mail which is needed to create the delivery project if self.pi_email is None: try: self.pi_email = self._get_pi_email() logger.info("email for PI for project {} found: {}".format(self.projectid, self.pi_email)) except Exception, e: logger.error("Cannot fetch pi_email from StatusDB. Error says: {}".format(str(e))) # print the traceback, not only error message -> isn't it something more useful? logger.exception(e) status = False return status
def deliver_project(self): """ Deliver all samples in a project to grus :returns: True if all samples were delivered successfully, False if any sample was not properly delivered or ready to be delivered """ #first thing check that we are using mover 1.0.0 if not check_mover_version(): logger.error("Not delivering becouse wrong mover version detected") return False # moved this part from constructor, as we can create an object without running the delivery (e.g. to check_delivery_status) #check if the project directory already exists, if so abort hard_stagepath = self.expand_path(self.stagingpathhard) if os.path.exists(hard_stagepath): logger.error("In {} found already folder {}. No multiple mover deliveries are allowed".format( hard_stagepath, self.projectid)) raise DelivererInterruptedError("Hard Staged Folder already present") #check that this project is not under delivery with mover already in this case stop delivery if self.get_delivery_status() == 'DELIVERED' \ and not self.force: logger.info("{} has already been delivered. This project will not be delivered again this time.".format(str(self))) return True elif self.get_delivery_status() == 'IN_PROGRESS': logger.error("Project {} is already under delivery. No multiple mover deliveries are allowed".format( self.projectid)) raise DelivererInterruptedError("Proejct already under delivery with Mover") elif self.get_delivery_status() == 'PARTIAL': logger.warning("{} has already been partially delivered. Please confirm you want to proceed.".format(str(self))) if proceed_or_not("Do you want to proceed (yes/no): "): logger.info("{} has already been partially delivered. User confirmed to proceed.".format(str(self))) else: logger.error("{} has already been partially delivered. User decided to not proceed.".format(str(self))) return False #now check if the sensitive flag has been set in the correct way question = "This project has been marked as SENSITIVE (option --sensitive). Do you want to proceed with delivery? " if not self.sensitive: question = "This project has been marked as NON-SENSITIVE (option --no-sensitive). Do you want to proceed with delivery? " if proceed_or_not(question): logger.info("Delivering {} to GRUS with mover. Project marked as SENSITIVE={}".format(str(self), self.sensitive)) else: logger.error("{} delivery has been aborted. Sensitive level was WRONG.".format(str(self))) return False #now start with the real work status = True #otherwise lock the delivery by creating the folder create_folder(hard_stagepath) #now find the PI mail which is needed to create the delivery project if self.pi_email is None: try: self.pi_email = self._get_pi_email() logger.info("email for PI for project {} found: {}".format(self.projectid, self.pi_email)) except Exception, e: logger.error("Cannot fetch pi_email from StatusDB. Error says: {}".format(str(e))) # print the traceback, not only error message -> isn't it something more useful? logger.exception(e) status = False return status
def merge_demux_results(fc_dir): """Merge results of demultiplexing from different demultiplexing folders :param str fc_dir: Path to the flowcell directory. """ for option in CONFIG['analysis']['bcl2fastq']['options']: if isinstance(option, dict) and option.get('output-dir'): _demux_folder = option.get('output-dir') unaligned_dirs = glob.glob(os.path.join(fc_dir, '{}_*'.format(_demux_folder))) #If it is a MiSeq run, the fc_id will be everything after the - if '-' in os.path.basename(fc_dir): fc_id = os.path.basename(fc_dir).split('_')[-1] #If it is a HiSeq run, we only want the flowcell id (without A/B) else: fc_id = os.path.basename(fc_dir).split('_')[-1][1:] basecall_dir = 'Basecall_Stats_{fc_id}'.format(fc_id=fc_id) merged_dir = os.path.join(fc_dir, _demux_folder) merged_basecall_dir = os.path.join(merged_dir, basecall_dir) #Create the final Unaligned folder and copy there all configuration files filesystem.create_folder(os.path.join(merged_dir, basecall_dir)) shutil.copy(os.path.join(unaligned_dirs[0], basecall_dir, 'Flowcell_demux_summary.xml'), merged_basecall_dir) shutil.copy(os.path.join(unaligned_dirs[0], basecall_dir, 'Demultiplex_Stats.htm'), merged_basecall_dir) #The file Undemultiplexed_stats.metrics may not always be there. u_s_file = os.path.exists(os.path.join(unaligned_dirs[0], basecall_dir, 'Undemultiplexed_stats.metrics')) if u_s_file: shutil.copy(os.path.join(unaligned_dirs[0], basecall_dir, 'Undemultiplexed_stats.metrics'), merged_basecall_dir) #And it is possible that it is empty, in which case we have to add #the header u_s_file_final = os.path.join(merged_basecall_dir, 'Undemultiplexed_stats.metrics') with open(u_s_file_final, 'r') as f: content = f.readlines() header = ['lane', 'sequence', 'count', 'index_name'] if content and content[0].split() != header: with open(u_s_file_final, 'w') as final: final.writelines('\t'.join(header) + '\n') if len(unaligned_dirs) > 1: for u in unaligned_dirs[1:]: #Merge Flowcell_demux_summary.xml m_flowcell_demux = merge_flowcell_demux_summary(merged_dir, u, fc_id) m_flowcell_demux.write(os.path.join(merged_dir, basecall_dir, 'Flowcell_demux_summary.xml')) #Merge Demultiplex_Stats.htm m_demultiplex_stats = merge_demultiplex_stats(merged_dir, u, fc_id) with open(os.path.join(merged_dir, basecall_dir, 'Demultiplex_Stats.htm'), 'w+') as f: f.writelines(re.sub(r"Unaligned_[0-9]{1,2}bp", 'Unaligned', m_demultiplex_stats.renderContents())) #Merge Undemultiplexed_stats.metrics if u_s_file: merge_undemultiplexed_stats_metrics(merged_dir, u, fc_id)
def create_FC(incoming_dir, run_name, samplesheet, fastq_1 = None, fastq_2=None ): # create something like 160217_ST-E00201_0063_AHJHNYCCXX path_to_fc = os.path.join(incoming_dir, run_name) if os.path.exists(path_to_fc): # this FC exists, skip it return fs.create_folder(path_to_fc) fs.touch(os.path.join(path_to_fc, "RTAComplete.txt")) # create folder Demultiplexing fs.create_folder(os.path.join(path_to_fc, "Demultiplexing")) # create folder Demultiplexing/Reports fs.create_folder(os.path.join(path_to_fc, "Demultiplexing", "Reports")) # create folder Demultiplexing/Stats fs.create_folder(os.path.join(path_to_fc, "Demultiplexing", "Stats")) #memorise SampleSheet stats header = [] for key in samplesheet[0]: header.append(key) counter = 1 current_lane = "" for line in samplesheet: project_name = line.get("Sample_Project", line.get("Project", "")) lane = line["Lane"] if current_lane == "": current_lane = lane elif current_lane != lane: counter = 1 current_lane = lane sample_id = line.get("SampleID", line.get("Sample_ID", "")) sample_name = line.get("SampleName", line.get("Sample_Name", "")) #create dir structure fs.create_folder(os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id)) #now create the data fastq_1_dest = "{}_S{}_L00{}_R1_001.fastq.gz".format(sample_name, counter, lane) fastq_2_dest = "{}_S{}_L00{}_R2_001.fastq.gz".format(sample_name, counter, lane) counter += 1 if fastq_1 is None: fs.touch(os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_1_dest)) fs.touch(os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_2_dest)) else: fs.do_symlink(fastq_1, os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_1_dest)) fs.do_symlink(fastq_2, os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_2_dest)) with open(os.path.join(path_to_fc, "SampleSheet.csv"), "w") as Samplesheet_file: Samplesheet_file.write("[Header]\n") Samplesheet_file.write("Date,2016-03-29\n") Samplesheet_file.write("Investigator Name,Christian Natanaelsson\n") Samplesheet_file.write("[Data]\n") for key in header: Samplesheet_file.write("{},".format(key)) Samplesheet_file.write("\n") for line in samplesheet: for key in header: Samplesheet_file.write("{},".format(line[key])) Samplesheet_file.write("\n")
def acknowledge_delivery(self, tstamp=_timestamp()): try: ackfile = self.expand_path( os.path.join(self.deliverystatuspath, "{}_delivered.ack".format( self.sampleid or self.projectid))) create_folder(os.path.dirname(ackfile)) with open(ackfile, 'w') as fh: fh.write("{}\n".format(tstamp)) except (AttributeError, IOError) as e: logger.warning( "could not write delivery acknowledgement, reason: {}".format( e))
def deliver_run_folder(self): """ Symlink run folder to stage path, create DDS delivery project and upload data. """ # Stage the data dst = self.expand_path(self.stagingpath) path_to_data = self.expand_path(self.datapath) runfolder_archive = os.path.join(path_to_data, self.fcid + ".tar.gz") runfolder_md5file = runfolder_archive + ".md5" question = "This project has been marked as SENSITIVE (option --sensitive). Do you want to proceed with delivery? " if not self.sensitive: question = "This project has been marked as NON-SENSITIVE (option --no-sensitive). Do you want to proceed with delivery? " if proceed_or_not(question): logger.info("Delivering {} with DDS. Project marked as SENSITIVE={}".format(str(self), self.sensitive)) else: logger.error("{} delivery has been aborted. Sensitive level was WRONG.".format(str(self))) return False status = True create_folder(dst) try: os.symlink(runfolder_archive, dst) os.symlink(runfolder_md5file, dst) logger.info("Symlinking files {} and {} to {}".format(runfolder_archive, runfolder_md5file, dst)) except IOError as e: logger.error("Unable to symlink files to {}. Please check that the files " "exist and that the filenames match the flowcell ID.".format(dst)) delivery_id = '' try: delivery_id = self._create_delivery_project() logger.info("Delivery project for project {} has been created. " "Delivery ID is {}".format(self.projectid, delivery_id)) except AssertionError as e: logger.exception('Unable to detect DDS delivery project.') raise e # Upload with DDS dds_delivery_status = self.upload_data(delivery_id) if dds_delivery_status: logger.info("DDS upload for project {} to " "delivery project {} was sucessful".format(self.projectid, delivery_id)) else: logger.error('Something when wrong when uploading {} ' 'to DDS project {}'.format(self.projectid, delivery_id)) status = False return status
def create_report(self): """ Create a sample report and an aggregate report via a system call """ logprefix = os.path.abspath( self.expand_path( os.path.join(self.logpath, "{}-{}".format(self.projectid, self.sampleid)))) try: if not create_folder(os.path.dirname(logprefix)): logprefix = None except AttributeError: logprefix = None with chdir(self.expand_path(self.reportpath)): # create the ign_sample_report for this sample cl = self.report_sample.split(' ') cl.extend(["--samples", self.sampleid]) call_external_command(cl, with_log_files=(logprefix is not None), prefix="{}_sample".format(logprefix)) # estimate the delivery date for this sample to 0.5 days ahead cl = self.report_aggregate.split(' ') cl.extend([ "--samples_extra", json.dumps({ self.sampleid: { "delivered": "{}(expected)".format(_timestamp(days=0.5)) } }) ]) call_external_command(cl, with_log_files=(logprefix is not None), prefix="{}_aggregate".format(logprefix))
def test_crete_folder3(self): """ Ensure that create_folder handles thrown exceptions gracefully """ with mock.patch.object(filesystem.os, 'makedirs', side_effect=OSError): self.assertFalse( filesystem.create_folder( os.path.join(self.rootdir, "target-non-existing")), "A raised exception was not handled properly")
def test_crete_folder3(self): """ Ensure that create_folder handles thrown exceptions gracefully """ with mock.patch.object(filesystem.os,'makedirs',side_effect=OSError): self.assertFalse( filesystem.create_folder( os.path.join(self.rootdir,"target-non-existing")), "A raised exception was not handled properly")
def create_report(self): """ Create a sample report and an aggregate report via a system call """ logprefix = os.path.abspath( self.expand_path(os.path.join(self.logpath, "{}-{}".format( self.projectid, self.sampleid)))) try: if not create_folder(os.path.dirname(logprefix)): logprefix = None except AttributeError: logprefix = None with chdir(self.expand_path(self.reportpath)): # create the ign_sample_report for this sample cl = self.report_sample.split(' ') cl.extend(["--samples",self.sampleid]) call_external_command( cl, with_log_files=(logprefix is not None), prefix="{}_sample".format(logprefix)) # estimate the delivery date for this sample to 0.5 days ahead cl = self.report_aggregate.split(' ') cl.extend([ "--samples_extra", json.dumps({ self.sampleid: { "delivered": "{}(expected)".format( _timestamp(days=0.5))}}) ]) call_external_command( cl, with_log_files=(logprefix is not None), prefix="{}_aggregate".format(logprefix))
def transfer(self): """Create the symlink as specified by this SymlinkAgent instance. :returns: True if the symlink was created successfully, False otherwise :raises transfer.TransferError: if src_path or dest_path were not valid :raises transfer.SymlinkError: if an error occurred when creating the symlink """ self.validate_src_path() self.validate_dest_path() if os.path.exists(self.dest_path): # If the existing target is a symlink that points to the # source, we're all good if self.validate_transfer(): logger.debug('target exists and points to the correct ' 'source path: "{}"'.format(self.src_path)) return True # If we are not overwriting, return False if not self.overwrite: logger.debug('target "{}" exists and will not be ' 'overwritten'.format(self.dest_path)) return False # If the target is a mount, let's not mess with it if os.path.ismount(self.dest_path): raise SymlinkError('target exists and is a mount') # If the target is a link or a file, we remove it if os.path.islink(self.dest_path) or \ os.path.isfile(self.dest_path): logger.debug('removing existing target file "{}"'.format( self.dest_path)) try: os.unlink(self.dest_path) except OSError as e: raise SymlinkError(e) # If the target is a directory, we remove it and # everything underneath elif os.path.isdir(self.dest_path): logger.debug('removing existing target folder "{}"'.format( self.dest_path)) try: shutil.rmtree(self.dest_path) except OSError as e: raise SymlinkError(e) # If it's something else, let's bail out else: raise SymlinkError('target exists and will not be overwritten') if not create_folder(os.path.dirname(self.dest_path)): raise SymlinkError('failed to create target folder hierarchy') try: # If we should create a relative symlink, determine the relative path os.symlink( os.path.relpath(self.src_path,os.path.dirname(self.dest_path)) \ if self.relative else self.src_path, self.dest_path) except OSError as e: raise SymlinkError(e) return (not self.validate) or self.validate_transfer()
def test_crete_folder1(self): """ Ensure that a non-existing folder is created """ target_folder = os.path.join(self.rootdir, "target-non-existing") self.assertTrue(filesystem.create_folder(target_folder), "A non-existing target folder could not be created") self.assertTrue( os.path.exists(target_folder), "A non-existing target folder was not created \ but method returned True")
def transfer(self): """ Create the symlink as specified by this SymlinkAgent instance. :returns: True if the symlink was created successfully, False otherwise :raises transfer.TransferError: if src_path or dest_path were not valid :raises transfer.SymlinkError: if an error occurred when creating the symlink """ self.validate_src_path() self.validate_dest_path() if os.path.exists(self.dest_path): # If the existing target is a symlink that points to the # source, we're all good if self.validate_transfer(): logger.debug("target exists and points to the correct " "source path: '{}'".format(self.src_path)) return True # If we are not overwriting, return False if not self.overwrite: logger.debug("target '{}' exists and will not be " "overwritten".format(self.dest_path)) return False # If the target is a mount, let's not mess with it if os.path.ismount(self.dest_path): raise SymlinkError("target exists and is a mount") # If the target is a link or a file, we remove it if os.path.islink(self.dest_path) or \ os.path.isfile(self.dest_path): logger.debug("removing existing target file '{}'" .format(self.dest_path)) try: os.unlink(self.dest_path) except OSError as e: raise SymlinkError(e) # If the target is a directory, we remove it and # everything underneath elif os.path.isdir(self.dest_path): logger.debug("removing existing target folder '{}'" .format(self.dest_path)) try: shutil.rmtree(self.dest_path) except OSError as e: raise SymlinkError(e) # If it's something else, let's bail out else: raise SymlinkError("target exists and will not be overwritten") if not create_folder(os.path.dirname(self.dest_path)): raise SymlinkError("failed to create target folder hierarchy") try: # If we should create a relative symlink, determine the relative path os.symlink( os.path.relpath(self.src_path,os.path.dirname(self.dest_path)) \ if self.relative else self.src_path, self.dest_path) except OSError as e: raise SymlinkError(e) return (not self.validate) or self.validate_transfer()
def test_crete_folder_parent_non_existing(self): """Ensure that a non-existing parent folder is created.""" target_folder = os.path.join(self.rootdir, 'parent-non-existing', 'target-non-existing') self.assertTrue( filesystem.create_folder(target_folder), 'A non-existing parent and target folder could not be created') self.assertTrue( os.path.exists(target_folder), 'A non-existing parent folder was not created \ but method returned True')
def test_crete_folder1(self): """ Ensure that a non-existing folder is created """ target_folder = os.path.join(self.rootdir,"target-non-existing") self.assertTrue( filesystem.create_folder(target_folder), "A non-existing target folder could not be created") self.assertTrue( os.path.exists(target_folder), "A non-existing target folder was not created \ but method returned True" )
def test_deliver_sample1(self): """ transfer a sample using rsync """ # create some content to transfer digestfile = self.deliverer.staging_digestfile() filelist = self.deliverer.staging_filelist() basedir = os.path.dirname(digestfile) create_folder(basedir) expected = [] with open(digestfile, 'w') as dh, open(filelist, 'w') as fh: curdir = basedir for d in xrange(4): if d > 0: curdir = os.path.join(curdir, "folder{}".format(d)) create_folder(curdir) for n in xrange(5): fpath = os.path.join(curdir, "file{}".format(n)) open(fpath, 'w').close() rpath = os.path.relpath(fpath, basedir) digest = hashfile(fpath, hasher=self.deliverer.hash_algorithm) if n < 3: expected.append(rpath) fh.write("{}\n".format(rpath)) dh.write("{} {}\n".format(digest, rpath)) rpath = os.path.basename(digestfile) expected.append(rpath) fh.write("{}\n".format(rpath)) # transfer the listed content destination = self.deliverer.expand_path(self.deliverer.deliverypath) create_folder(os.path.dirname(destination)) self.assertTrue(self.deliverer.do_delivery(), "failed to deliver sample") # list the trasferred files relative to the destination observed = [os.path.relpath(os.path.join(d, f), destination) for d, _, files in os.walk(destination) for f in files] self.assertItemsEqual(observed, expected)
def test_deliver_sample1(self): """ transfer a sample using rsync """ # create some content to transfer digestfile = self.deliverer.staging_digestfile() filelist = self.deliverer.staging_filelist() basedir = os.path.dirname(digestfile) create_folder(basedir) expected = [] with open(digestfile, 'w') as dh, open(filelist, 'w') as fh: curdir = basedir for d in range(4): if d > 0: curdir = os.path.join(curdir, "folder{}".format(d)) create_folder(curdir) for n in range(5): fpath = os.path.join(curdir, "file{}".format(n)) open(fpath, 'w').close() rpath = os.path.relpath(fpath, basedir) digest = hashfile(fpath, hasher=self.deliverer.hash_algorithm) if n < 3: expected.append(rpath) fh.write(u"{}\n".format(rpath)) dh.write(u"{} {}\n".format(digest, rpath)) rpath = os.path.basename(digestfile) expected.append(rpath) fh.write(u"{}\n".format(rpath)) # transfer the listed content destination = self.deliverer.expand_path(self.deliverer.deliverypath) create_folder(os.path.dirname(destination)) self.assertTrue(self.deliverer.do_delivery(), "failed to deliver sample") # list the trasferred files relative to the destination observed = [os.path.relpath(os.path.join(d, f), destination) for d, _, files in os.walk(destination) for f in files] self.assertEqual(sorted(observed), sorted(expected))
def produce_analysis_piper(ngi_config, project_id): # Create piper_ngi analysis_dir = os.path.join(ngi_config['analysis']['base_root'], ngi_config['analysis']['sthlm_root'], ngi_config['analysis']['top_dir'], 'ANALYSIS', project_id) data_dir = os.path.join(ngi_config['analysis']['base_root'], ngi_config['analysis']['sthlm_root'], ngi_config['analysis']['top_dir'], 'DATA', project_id) piper_ngi_dir = os.path.join(analysis_dir, 'piper_ngi') fs.create_folder(piper_ngi_dir) piper_dirs = ['01_raw_alignments', '02_preliminary_alignment_qc', '03_genotype_concordance', '04_merged_aligments', '05_processed_alignments', '06_final_alignment_qc', '07_variant_calls', '08_misc'] for piper_dir in piper_dirs: current_dir = os.path.join(piper_ngi_dir, piper_dir) fs.create_folder(current_dir) if piper_dir == '05_processed_alignments': for sample_id in os.listdir(data_dir): bam_file = '{}.clean.dedup.bam'.format(sample_id) fs.touch(os.path.join(current_dir, bam_file)) if piper_dir == '07_variant_calls': for sample_id in os.listdir(data_dir): vcf_file = '{}.clean.dedup.recal.bam.raw.indel.vcf.gz'.format(sample_id) fs.touch(os.path.join(current_dir, vcf_file)) current_dir = os.path.join(piper_ngi_dir, 'sbatch') fs.create_folder(current_dir) current_dir = os.path.join(piper_ngi_dir, 'setup_xml_files') fs.create_folder(current_dir) current_dir = os.path.join(piper_ngi_dir, 'logs') fs.create_folder(current_dir) create_version_report(current_dir)
def create_report(self): """ Create a final aggregate report via a system call """ logprefix = os.path.abspath( self.expand_path(os.path.join(self.logpath, self.projectid))) try: if not create_folder(os.path.dirname(logprefix)): logprefix = None except AttributeError: logprefix = None with chdir(self.expand_path(self.reportpath)): cl = self.report_aggregate.split(' ') call_external_command(cl, with_log_files=(logprefix is not None), prefix="{}_aggregate".format(logprefix))
def create_report(self): """ Create a final aggregate report via a system call """ logprefix = os.path.abspath( self.expand_path(os.path.join(self.logpath, self.projectid))) try: if not create_folder(os.path.dirname(logprefix)): logprefix = None except AttributeError: logprefix = None with chdir(self.expand_path(self.reportpath)): cl = self.report_aggregate.split(' ') call_external_command( cl, with_log_files=(logprefix is not None), prefix="{}_aggregate".format(logprefix))
def produce_analysis_qc_ngi(ngi_config, project_id): analysis_dir = os.path.join(ngi_config['analysis']['base_root'], ngi_config['analysis']['sthlm_root'], ngi_config['analysis']['top_dir'], 'ANALYSIS', project_id) data_dir = os.path.join(ngi_config['analysis']['base_root'], ngi_config['analysis']['sthlm_root'], ngi_config['analysis']['top_dir'], 'DATA', project_id) qc_ngi_dir = os.path.join(analysis_dir, 'qc_ngi') fs.create_folder(qc_ngi_dir) for sample_id in os.listdir(data_dir): sample_dir_qc = os.path.join(qc_ngi_dir, sample_id) fs.create_folder(sample_dir_qc) fastqc_dir = os.path.join(sample_dir_qc, 'fastqc') fs.create_folder(fastqc_dir) fastq_screen_dir = os.path.join(sample_dir_qc, 'fastq_screen') fs.create_folder(fastq_screen_dir)
def produce_analysis_qc_ngi(ngi_config, project_id): analysis_dir = os.path.join(ngi_config["analysis"]["base_root"], ngi_config["analysis"]["sthlm_root"], ngi_config["analysis"]["top_dir"], "ANALYSIS", project_id) data_dir = os.path.join(ngi_config["analysis"]["base_root"], ngi_config["analysis"]["sthlm_root"], ngi_config["analysis"]["top_dir"], "DATA", project_id) qc_ngi_dir = os.path.join(analysis_dir, "qc_ngi") fs.create_folder(qc_ngi_dir) for sample_id in os.listdir(data_dir): sample_dir_qc = os.path.join(qc_ngi_dir, sample_id) fs.create_folder(sample_dir_qc) fastqc_dir = os.path.join(sample_dir_qc, "fastqc") fs.create_folder(fastqc_dir) fastq_screen_dir = os.path.join(sample_dir_qc, "fastq_screen") fs.create_folder(fastq_screen_dir)
raise AssertionError('No staged samples found in Charon') # collect other files (not samples) if any to include in the hard staging misc_to_deliver = [ itm for itm in os.listdir(soft_stagepath) if os.path.splitext(itm)[0] not in samples_to_deliver ] question = "\nProject stagepath: {}\nSamples: {}\nMiscellaneous: {}\n\nProceed with delivery ? " question = question.format(soft_stagepath, ", ".join(samples_to_deliver), ", ".join(misc_to_deliver)) if proceed_or_not(question): logger.info("Proceeding with delivery of {}".format(str(self))) #lock the delivery by creating the folder create_folder(hard_stagepath) else: logger.error( "Aborting delivery for {}, remove unwanted files and try again" .format(str(self))) return False hard_staged_samples = [] for sample_id in samples_to_deliver: try: sample_deliverer = GrusSampleDeliverer(self.projectid, sample_id) sample_deliverer.deliver_sample() except Exception, e: logger.error( 'Sample {} has not been hard staged. Error says: {}'.
def test_crete_folder2(self): """ Ensure that an existing folder is detected """ self.assertTrue(filesystem.create_folder(self.rootdir), "A pre-existing target folder was not detected")
def test_crete_folder2(self): """ Ensure that an existing folder is detected """ self.assertTrue( filesystem.create_folder(self.rootdir), "A pre-existing target folder was not detected")