def align_samples(self, samples_dir, output_dir, genome_dir, genome_fasta=None, samples=None, annotation_gtf=None, sjdboverhang=None, genomeSAindexNbases=None, genomeChrBinNbits=None, genome_size=None, feature_from_gtf_to_use_as_exon=None, exon_tag_to_use_as_transcript_id=None, exon_tag_to_use_as_gene_id=None, length_of_sequences_flanking_junction=None, junction_tab_file_list=None, three_prime_trim=None, five_prime_trim=None, adapter_seq_for_three_prime_clip=None, max_mismatch_percent_for_adapter_trimming=None, three_prime_trim_after_adapter_clip=None, output_type="BAM", sort_bam=True, max_memory_for_bam_sorting=8000000000, include_unmapped_reads_in_bam=True, output_unmapped_reads=True, two_pass_mode=True, max_intron_length=None): #STAR.threads = threads #STAR.path = star_dir if genome_fasta: STAR.index(genome_dir, genome_fasta, annotation_gtf=annotation_gtf, junction_tab_file=junction_tab_file_list, sjdboverhang=sjdboverhang, genomeSAindexNbases=genomeSAindexNbases, genomeChrBinNbits=genomeChrBinNbits, genome_size=genome_size) sample_list = samples if samples else self.get_sample_list(samples_dir) FileRoutines.safe_mkdir(output_dir) for sample in sample_list: print("Handling %s" % sample) sample_dir = "%s/%s/" % (samples_dir, sample) alignment_sample_dir = "%s/%s/" % (output_dir, sample) FileRoutines.safe_mkdir(alignment_sample_dir) filetypes, forward_files, reverse_files = FileRoutines.make_lists_forward_and_reverse_files( sample_dir) print "\tAligning reads..." STAR.align( genome_dir, forward_files, reverse_read_list=reverse_files, annotation_gtf=annotation_gtf if not genome_fasta else None, feature_from_gtf_to_use_as_exon=feature_from_gtf_to_use_as_exon, exon_tag_to_use_as_transcript_id= exon_tag_to_use_as_transcript_id, exon_tag_to_use_as_gene_id=exon_tag_to_use_as_gene_id, length_of_sequences_flanking_junction= length_of_sequences_flanking_junction, junction_tab_file_list=junction_tab_file_list, three_prime_trim=three_prime_trim, five_prime_trim=five_prime_trim, adapter_seq_for_three_prime_clip= adapter_seq_for_three_prime_clip, max_mismatch_percent_for_adapter_trimming= max_mismatch_percent_for_adapter_trimming, three_prime_trim_after_adapter_clip= three_prime_trim_after_adapter_clip, output_type=output_type, sort_bam=sort_bam, max_memory_for_bam_sorting=max_memory_for_bam_sorting, include_unmapped_reads_in_bam=include_unmapped_reads_in_bam, output_unmapped_reads=output_unmapped_reads, output_dir=alignment_sample_dir, two_pass_mode=two_pass_mode, max_intron_length=max_intron_length) print "\tIndexing bam file..." resulting_bam_file = "%s/Aligned.sortedByCoord.out.bam" % alignment_sample_dir SamtoolsV1.index(resulting_bam_file)
sjdboverhang=None, genomeSAindexNbases=None, genomeChrBinNbits=None, genome_size=args.genome_size) sample_list = args.samples if args.samples else Pipeline.get_sample_list( args.samples_dir) FileRoutines.safe_mkdir(args.output_dir) for sample in sample_list: print("Handling %s" % sample) sample_dir = "%s/%s/" % (args.samples_dir, sample) alignment_sample_dir = "%s/%s/" % (args.output_dir, sample) FileRoutines.safe_mkdir(alignment_sample_dir) filetypes, forward_files, reverse_files = FileRoutines.make_lists_forward_and_reverse_files( sample_dir) print "\tAligning reads..." STAR.align( args.genome_dir, forward_files, reverse_read_list=reverse_files, annotation_gtf=args.annotation_gtf if not args.genome_fasta else None, feature_from_gtf_to_use_as_exon=None, exon_tag_to_use_as_transcript_id=None, exon_tag_to_use_as_gene_id=None, length_of_sequences_flanking_junction=None, junction_tab_file_list=args.junction_tab_file, three_prime_trim=None, five_prime_trim=None,