def main(command_line_args=None): """ :param command_line_args: """ VersionDependencies.python_check() if not command_line_args: command_line_args = sys.argv parser = argparse.ArgumentParser( description="A package to process Synthetic Lethal Data.\n {0} v{1}". format(__package__, __version__), formatter_class=RawTextHelpFormatter) parser.add_argument('--options_file', action='store', dest='options_file', required=True, help='File containing program parameters.') # Convert universal variables intended as boolean from string to boolean. args, options_parser = string_to_boolean(Tool_Box.options_file(parser)) # Check file names and paths for errors error_checking(args) log = Tool_Box.Logger(args) Tool_Box.log_environment_info(log, args, command_line_args) start_time = time.time() module_name = "Synthetic_Lethal" log.info( "{0} v{1}; Module: Synthetic Lethal Analysis v{2} Beginning".format( __package__, __version__, Synthetic_Lethal.__version__)) synthetic_lethal = Synthetic_Lethal.SyntheticLethal(log, args) if args.TargetSearch: synthetic_lethal.fastq_analysis() elif args.Statistics: synthetic_lethal.statistics() else: log.error('No module selected to run.') warning = "\033[1;31m **See warnings above**\033[m" if log.warning_occurred else '' elapsed_time = int(time.time() - start_time) log.info( "****Völundr {0} complete ({1} seconds, {2} Mb peak memory).****\n{3}". format(module_name, elapsed_time, Tool_Box.peak_memory(), warning))
def main(): """ """ VersionDependencies.python_check() parser = argparse.ArgumentParser( description="A package to process Synthetic Lethal Data.\n {0} v{1}". format(__package__, __version__), formatter_class=RawTextHelpFormatter) parser.add_argument('--options_file', action='store', dest='options_file', required=True, help='File containing program parameters.') # Convert strings to int, float, boolean, check file names and paths for errors args, log = error_checking(parser) start_time = time.time() # Initialize program synthetic_lethal = Synthetic_Lethal.SyntheticLethal(log, args) if args.TargetSearch: module_name = "Target Search" log.info("{} v{}; Module: {} v{} Beginning".format( __package__, __version__, module_name, Synthetic_Lethal.__version__)) synthetic_lethal.fastq_analysis() elif args.Statistics: module_name = "Statistical Analysis" log.info("{} v{}; Module: {} v{} Beginning".format( __package__, __version__, module_name, Synthetic_Lethal.__version__)) synthetic_lethal.statistics() else: module_name = "No module selected" log.error('No module selected to run.') warning = "\033[1;31m **See warnings above**\033[m" if log.warning_occurred else '' elapsed_time = int(time.time() - start_time) log.info( "****Völundr {0} complete ({1} seconds, {2} Mb peak memory).****\n{3}". format(module_name, elapsed_time, Tool_Box.peak_memory(), warning))
def main(command_line_args=None): VersionDependencies.python_check() if not command_line_args: command_line_args = sys.argv run_start = datetime.datetime.today().strftime("%a %b %d %H:%M:%S %Y") parser = argparse.ArgumentParser(description="A little ditty to manipulate FASTQ files.\n {0} v{1}" .format(__package__, __version__), formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('--options_file', action='store', dest='options_file', required=True, help='File containing program parameters.') options_parser = Tool_Box.options_file(parser) args = options_parser.parse_args() # args, options_parser = string_to_boolean(args, options_parser) options_parser.set_defaults(Trim5=0) options_parser.set_defaults(Trim3=0) options_parser.set_defaults(Minimum_Length=100) options_parser.set_defaults(N_Limit=100) options_parser.set_defaults(HaloPLEX=False) options_parser.set_defaults(ThruPLEX=False) options_parser.set_defaults(FASTQ_PreProcess=True) args = options_parser.parse_args() # Check options file for errors. error_checking(args) log = Tool_Box.Logger(args) Tool_Box.log_environment_info(log, args, command_line_args) start_time = time.time() module_name = "" # Initialize generator to read each FASTQ file fastq1 = FASTQ_Tools.FASTQ_Reader(args.FASTQ1, log) fastq2 = FASTQ_Tools.FASTQ_Reader(args.FASTQ2, log) index1 = FASTQ_Tools.FASTQ_Reader(args.Index1, log) index2 = FASTQ_Tools.FASTQ_Reader(args.Index2, log) splitter_data = FASTQ_Tools.FastqSplitter(args, log, fastq1, fastq2, index1, index2, paired_end=True) new_fastq1, new_fastq2 = splitter_data.file_writer() warning = "\033[1;31m **See warnings above**\033[m" if log.warning_occurred else '' elapsed_time = int(time.time() - start_time) log.info("****FASTQ Preprocessing {0} complete ({1} seconds, {2} Mb peak memory).****" .format(module_name, elapsed_time, Tool_Box.peak_memory(), warning))
def main(command_line_args=None): """ :param command_line_args: """ VersionDependencies.python_check() if not command_line_args: command_line_args = sys.argv parser = argparse.ArgumentParser( description="A package to process Synthetic Lethal Data.\n {0} v{1}". format(__package__, __version__), formatter_class=RawTextHelpFormatter) parser.add_argument('--options_file', action='store', dest='options_file', required=True, help='File containing program parameters.') options_parser = Tool_Box.options_file(parser) args = options_parser.parse_args() # If we are doing statistical analysis the user will not input an Index_Mismatch value if not getattr(args, "Index_Mismatch", False): options_parser.add_argument("--Index_Mismatch", dest="Index_Mismatch", default=0) options_parser.add_argument("--Analyze_Unknowns", dest="Analyze_Unknowns", default="False") args = options_parser.parse_args() log = Tool_Box.Logger(args) Tool_Box.log_environment_info(log, args, command_line_args) start_time = time.time() module_name = "Synthetic_Lethal" log.info( "{0} v{1}; Module: Synthetic Lethal Analysis v{2} Beginning".format( __package__, __version__, Synthetic_Lethal.__version__)) # Convert universal variables intended as boolean from string to boolean. # ToDo: Should be a cleaner method to do this. if args.Target_Search == "True": options_parser.set_defaults(Target_Search=True) if args.RevComp == "True": options_parser.set_defaults(RevComp=True) else: options_parser.set_defaults(RevComp=False) if args.Delete_Demultiplexed_FASTQ == "True": options_parser.set_defaults(Delete_Demultiplexed_FASTQ=True) else: options_parser.set_defaults(Delete_Demultiplexed_FASTQ=False) if args.compress == "True": options_parser.set_defaults(compress=True) else: options_parser.set_defaults(compress=False) else: options_parser.set_defaults(Target_Search=False) if args.Statistics == "True": options_parser.set_defaults(Statistics=True) else: options_parser.set_defaults(Statistics=False) if args.Analyze_Unknowns == "True": options_parser.set_defaults(Analyze_Unknowns=True) else: options_parser.set_defaults(Analyze_Unknowns=False) args = options_parser.parse_args() synthetic_lethal = Synthetic_Lethal.SyntheticLethal(log, args) # Add some parameters to our options parser object. args = options_parser.parse_args() if args.Target_Search: synthetic_lethal.fastq_analysis() elif args.Statistics: synthetic_lethal.statistics() else: log.error('No module selected to run.') warning = "\033[1;31m **See warnings above**\033[m" if log.warning_occurred else '' elapsed_time = int(time.time() - start_time) log.info( "****Volundr {0} complete ({1} seconds, {2} Mb peak memory).****\n{3}". format(module_name, elapsed_time, Tool_Box.peak_memory(), warning))
def main(command_line_args=None): """ Let's get this party started. :param command_line_args: """ start_time = time.time() VersionDependencies.python_check() if not command_line_args: command_line_args = sys.argv run_start = datetime.datetime.today().strftime("%H:%M:%S %Y %a %b %d") parser = argparse.ArgumentParser( description= "A package to map genomic repair scars at defined loci.\n {} v{}". format(__package__, __version__), formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('--options_file', action='store', dest='options_file', required=True, help='File containing program parameters.') # Check options file for errors and return object. args = error_checking(string_to_boolean(parser)) log = Tool_Box.Logger(args) Tool_Box.log_environment_info(log, args, command_line_args) module_name = "" log.info("{} v{}".format(__package__, __version__)) if args.IndelProcessing: file_list = [] if args.Platform == "Illumina" or args.Platform == "Ramsden" or args.Platform == "TruSeq": log.info("Sending FASTQ files to FASTQ preprocessor.") if args.PEAR: file_list = pear_consensus(args, log) if not file_list: log.error("PEAR failed. Check logs.") raise SystemExit(1) fastq_consensus = file_list[0] fq1 = FASTQ_Tools.FASTQ_Reader(fastq_consensus, log) fq2 = None else: fq2 = FASTQ_Tools.FASTQ_Reader(args.FASTQ2, log) fq1 = FASTQ_Tools.FASTQ_Reader(args.FASTQ1, log) sample_manifest = Tool_Box.FileParser.indices( log, args.SampleManifest) indel_processing = \ Indel_Processing.DataProcessing(log, args, run_start, __version__, Target_Mapper.TargetMapper(log, args, sample_manifest), fq1, fq2) indel_processing.main_loop() # Compress or delete PEAR files. if args.PEAR and file_list: if args.DeleteConsensusFASTQ: log.info("Deleting PEAR FASTQ Files.") Tool_Box.delete(file_list) else: log.info( "Compressing {} FASTQ Files Generated by PEAR.".format( len(file_list))) p = pathos.multiprocessing.Pool(int(args.Spawn)) p.starmap(Tool_Box.compress_files, zip(file_list, itertools.repeat(log))) else: log.error( "Only 'Illumina', 'TruSeq' or 'Ramsden' --Platform methods currently allowed." ) raise SystemExit(1) elif not args.IndelProcessing: # Run frequency file Combine module run_start = datetime.datetime.today().strftime("%a %b %d %H:%M:%S %Y") log.info("Process Replicates.") data_dict = collections.defaultdict(list) file_list = [ f for f in glob.glob("{}*ScarMapper_Frequency.txt".format( args.DataFiles, )) ] file_count = len(file_list) page_header = "# ScarMapper File Merge v{}\n# Run: {}\n# Sample Name: {}\n" \ .format(__version__, run_start, args.SampleName) line_num = 0 index_file = list(csv.reader(open(file_list[0]), delimiter='\t')) for line in index_file: if not line: break elif line_num > 3: page_header += "{}\n".format(line[0]) line_num += 1 page_header += "\n\n" for file_name in file_list: freq_file_data = Tool_Box.FileParser.indices(log, file_name) for row in freq_file_data: key = "{}|{}|{}|{}".format(row[3], row[4], row[6], row[8]) row_data = row[2:] if key in data_dict: data_dict[key][0].append(float(row[1])) else: data_dict[key] = [[float(row[1])], row_data] # Process Data and Write Combined Frequency results file plot_data_dict = collections.defaultdict(list) label_dict = collections.defaultdict(float) output_data_dict = collections.defaultdict(list) marker_list = [] for key, row_list in data_dict.items(): # Force pattern to be in at least half of the files. if len(row_list[0]) / file_count >= 0.5: row_string = "\t".join(row_list[1]) freq = gmean(row_list[0]) sem = stats.sem(row_list[0]) freq_results_outstring = "{}\t{}\t{}\n".format( freq, sem, row_string) output_key = freq # Freq is a 17 digit float so it is very unlikely to be duplicated but if it is this increments it by # a small number then checks the uniqueness again. if output_key in output_data_dict: output_key = output_key + 1e-16 if output_key in output_data_dict: output_key = output_key + 1e-16 scar_type = row_list[1][0] label_dict[scar_type] += freq # Gather up our data for plotting lft_del = int(row_list[1][1]) rt_del = int(row_list[1][2]) mh_size = int(row_list[1][5]) ins_size = int(row_list[1][7]) output_data_dict[output_key] = \ [(freq, lft_del, rt_del, mh_size, ins_size, scar_type), freq_results_outstring] freq_results_outstring = \ "{}# Frequency\tSEM\tScar Type\tLeft Deletions\tRight Deletions\tDeletion Size\tMicrohomology\t" \ "Microhomology Size\tInsertion\tInsertion Size\tLeft Template\tRight Template\tConsensus Left Junction\t" \ "Consensus Right Junction\tTarget Left Junction\tTarget Right Junction\tConsensus\tTarget Region\n" \ .format(page_header) # Now draw a pretty graph of the data if we are not dealing with a negative control. for k in natsort.natsorted(output_data_dict, reverse=True): data_list = output_data_dict[k] freq_results_outstring += data_list[1] freq = data_list[0][0] lft_del = data_list[0][1] rt_del = data_list[0][2] mh_size = data_list[0][3] ins_size = data_list[0][4] scar_type = data_list[0][5] # Plotting all scar patterns is messy. This provides a cutoff. if freq < 0.00025: continue y_value = freq * 0.5 lft_ins_width = freq rt_ins_width = freq # This is gathered up to find the largest value. Used to set the x-axis limits. marker_list.extend([ lft_del + (mh_size * 0.5), rt_del + (mh_size * 0.5), ins_size ]) # Deletion size included half the size of any microhomology present. lft_del_plot_value = (lft_del + (mh_size * 0.5)) * -1 rt_del_plot_value = rt_del + (mh_size * 0.5) # Insertions are centered on 0 so we need to take half the value for each side. lft_ins_plot_value = (ins_size * 0.5) * -1 rt_ins_plot_value = ins_size * 0.5 # Scale the width of bars for insertions inside of deletions if lft_del + (mh_size * 0.5) != 0: lft_ins_width = freq * 0.5 if rt_del + (mh_size * 0.5) != 0: rt_ins_width = freq * 0.5 if scar_type not in plot_data_dict: plot_data_dict[scar_type] = \ [[freq], [lft_del_plot_value], [rt_del_plot_value], [lft_ins_plot_value], [rt_ins_plot_value], [lft_ins_width], [rt_ins_width], [y_value]] else: # Get some previous plot data count = len(plot_data_dict[scar_type][0]) previous_freq = plot_data_dict[scar_type][0][count - 1] previous_y = plot_data_dict[scar_type][7][count - 1] plot_data_dict[scar_type][0].append(freq) plot_data_dict[scar_type][1].append(lft_del_plot_value) plot_data_dict[scar_type][2].append(rt_del_plot_value) plot_data_dict[scar_type][3].append(lft_ins_plot_value) plot_data_dict[scar_type][4].append(rt_ins_plot_value) plot_data_dict[scar_type][5].append(lft_ins_width) plot_data_dict[scar_type][6].append(rt_ins_width) # Use the previous plot data to find the y-value of the current bar. plot_data_dict[scar_type][7] \ .append(previous_y + 0.002 + (0.5 * previous_freq) + y_value) plot_data_dict['Marker'] = [(max(marker_list)) * -1, max(marker_list)] # sample_name = "{}.{}".format(args.Job_Name, args.SampleName) ScarMapperPlot.scarmapperplot(args, datafile=None, sample_name=args.SampleName, plot_data_dict=plot_data_dict, label_dict=label_dict) freq_results_file = \ open("{}{}_ScarMapper_Combined_Frequency.txt".format(args.WorkingFolder, args.SampleName), "w") freq_results_file.write(freq_results_outstring) freq_results_file.close() warning = "\033[1;31m **See warnings above**\033[m" if log.warning_occurred else '' elapsed_time = int(time.time() - start_time) log.info( "****ScarMapper {0} complete ({1} seconds, {2} Mb peak memory).****". format(module_name, elapsed_time, Tool_Box.peak_memory(), warning)) # All done so we need to quit otherwise Python will not release the log file on virtual Linux. exit(0)