Ejemplo n.º 1
0
                    dest="label_ids",
                    help="Label ids by species names. Default - don't label")

parser.add_argument("-g",
                    "--separator_for_labeling",
                    action="store",
                    dest="separator_for_labeling",
                    default="@",
                    help="Separator to use for labeling. Default - '@'")
parser.add_argument("-r",
                    "--label_last",
                    action="store_false",
                    dest="label_first",
                    default=True,
                    help="Place label at the end of id")

args = parser.parse_args()

FileRoutines.safe_mkdir(args.output_dir)
SequenceClusterRoutines.extract_sequences_by_clusters(
    args.input_cluster_dir,
    args.input_seq_dir,
    args.output_dir,
    file_with_white_list_cluster_ids=args.white_list_ids,
    mode=args.mode,
    sequence_file_extension=args.seq_extension,
    sequence_file_format=args.format,
    label_species=args.label_ids,
    separator_for_labeling=args.separator_for_labeling,
    species_label_first=args.label_first)
Ejemplo n.º 2
0
print("Drawing histograms...")

for stat_file in output_evidence_stats, output_supported_stats, \
                 output_swissprot_pfam_or_hints_supported_transcripts_longest_pep_evidence, \
                 output_swissprot_pfam_and_hints_supported_transcripts_longest_pep_evidence, \
                 output_swissprot_pfam_or_hints_supported_transcripts_evidence, \
                 output_swissprot_pfam_and_hints_supported_transcripts_evidence:

    MatplotlibRoutines.percent_histogram_from_file(
        stat_file,
        stat_file,
        data_type=None,
        column_list=(2, ),
        comments="#",
        n_bins=20,
        title="Transcript support by hints",
        extensions=("png", "svg"),
        legend_location="upper center",
        stats_as_legend=True)
print("Creating final directories...")
if args.pfam_db and args.swissprot_db:
    db_or_hints_dir = "supported_by_db_or_hints/"
    db_and_hints_dir = "supported_by_db_and_hints/"
    for directory in db_and_hints_dir, db_or_hints_dir:
        FileRoutines.safe_mkdir(directory)

    os.system("mv %s.supported.transcripts.swissprot_or_pfam_or_hints* %s" %
              (args.output, db_or_hints_dir))
    os.system("mv %s.supported.transcripts.swissprot_or_pfam_and_hints* %s" %
              (args.output, db_and_hints_dir))
Ejemplo n.º 3
0
    "--min_species_number",
    action="store",
    dest="min_species_number",
    default=1,
    type=int,
    help="Minimum number of species with family to retain family. Default: 1")
parser.add_argument("-f",
                    "--filtered_families_directory",
                    action="store",
                    dest="filtered_family_dir",
                    default="filtered_fam",
                    type=FileRoutines.check_path,
                    help="Directory to write filtered_families")
args = parser.parse_args()

FileRoutines.safe_mkdir(args.filtered_family_dir)
species_list = sorted(args.species_set)
if args.white_list_file and args.black_list_file:
    raise ValueError("Black list and white list cant be set simultaneously")

black_list = IdList()
white_list = IdList()
if args.black_list_file:
    black_list.read(args.black_list_file)
if args.white_list_file:
    white_list.read(args.white_list_file)
out_fd = open(args.cafe_file, "w")
filtered_fd = open("%sfiltered_families.cafe" % args.filtered_family_dir, "w")
out_fd.write("FAMILYDESC\tFAMILY\t%s\n" % ("\t".join(species_list)))
filtered_fd.write("FAMILYDESC\tFAMILY\t%s\n" % ("\t".join(species_list)))
species_filtered_fd_list = OrderedDict()
Ejemplo n.º 4
0
parser.add_argument("-d",
                    "--top_hits_dir",
                    action="store",
                    dest="top_hits_dir",
                    default="top_hits_dir/",
                    type=FileRoutines.check_path,
                    help="Directory to write intermediate(splited) output")
parser.add_argument("-r",
                    "--retain_splited_output",
                    action="store_true",
                    dest="retain",
                    help="Retain splited output")

args = parser.parse_args()

FileRoutines.safe_mkdir(args.top_hits_dir)


def handle_input(filename):
    sys.stdout.write("Handling %s\n" % filename)
    not_significant_ids = IdList()
    not_found_ids = IdList()

    prefix = FileRoutines.split_filename(filename)[1]
    index_file = "%s.tmp.idx" % prefix
    hmm_dict = SearchIO.index_db(index_file, filename, args.format)
    if args.output == "stdout":
        out_fd = sys.stdout
    else:
        out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w")
        out_fd.write("#query\thit\tevalue\tbitscore\n")
Ejemplo n.º 5
0
                    action="store",
                    dest="max_memory_per_thread",
                    default="1G",
                    help="Maximum memory per thread. Default - 1G")
args = parser.parse_args()

if args.prepare_bam and ((not args.prepared_bam_prefix) or
                         (not args.temp_dir)):
    raise ValueError(
        "Options -e/--prepared_bam_prefix and -m/--temp_dir must be set if -p/--prepare_bam option is used"
    )

SamtoolsV1.threads = args.threads

if args.prepare_bam or args.mix_ends:
    FileRoutines.safe_mkdir(FileRoutines.check_path(args.temp_dir))
    prepared_pe_bam_file = "%s.bam" % args.prepared_bam_prefix
    prepared_unpaired_bam_file = (
        "%s.unpaired.bam" %
        args.prepared_bam_prefix) if args.mix_ends else None
    """
    SamtoolsV1.prepare_bam_for_read_extraction(args.input, args.prepared_bam, temp_file_prefix=args.temp_dir,
                                               max_memory_per_thread=args.max_memory_per_thread)
    """
    SamtoolsV1.prepare_bam_for_read_extraction(
        args.input,
        prepared_pe_bam_file,
        temp_file_prefix=args.temp_dir,
        max_memory_per_thread=args.max_memory_per_thread,
        bam_file_to_write_unpaired_reads=prepared_unpaired_bam_file)
if args.paired:
Ejemplo n.º 6
0
                    dest="output",
                    type=FileRoutines.check_path,
                    help="Output directory")
#parser.add_argument("-p", "--convert_options", action="store", dest="convert_options",
#                    help="Options for convert")
parser.add_argument("-d",
                    "--dont_make_negative",
                    action="store_true",
                    dest="dont_negative",
                    help="Dont make negative")

args = parser.parse_args()

temp_dir = "temp/"

FileRoutines.safe_mkdir(temp_dir)

BioConvert.threads = args.threads
Convert.threads = args.threads

BioConvert.parallel_convert(args.input, temp_dir)

if args.dont_negative:
    os.rename(temp_dir, args.output)

else:
    converted_files = os.listdir(temp_dir)
    converted_files = list(
        map(lambda s: "%s%s" % (temp_dir, s), converted_files))
    Convert.parallel_convert(
        converted_files, args.output,
Ejemplo n.º 7
0
STAR.path = args.star_dir

if args.genome_fasta:
    STAR.index(args.genome_dir,
               args.genome_fasta,
               annotation_gtf=args.annotation_gtf,
               junction_tab_file=args.junction_tab_file,
               sjdboverhang=None,
               genomeSAindexNbases=None,
               genomeChrBinNbits=None,
               genome_size=args.genome_size)

sample_list = args.samples if args.samples else Pipeline.get_sample_list(
    args.samples_dir)

FileRoutines.safe_mkdir(args.output_dir)

for sample in sample_list:
    print("Handling %s" % sample)
    sample_dir = "%s/%s/" % (args.samples_dir, sample)
    alignment_sample_dir = "%s/%s/" % (args.output_dir, sample)
    FileRoutines.safe_mkdir(alignment_sample_dir)
    filetypes, forward_files, reverse_files, se_files = FileRoutines.make_lists_forward_and_reverse_files(
        sample_dir)

    print("\tAligning reads...")

    STAR.align_miRNA(
        args.genome_dir,
        se_files,
        output_dir=alignment_sample_dir,
Ejemplo n.º 8
0
args = parser.parse_args()

Trimmomatic.jar_path = args.path_to_trimmomatic_dir
Trimmomatic.threads = args.threads
#print(Trimmomatic.path)
#print(Trimmomatic.jar_path)
samples = args.samples.split(",") if args.samples else os.listdir(args.samples_dir)

for sample in samples:
    print("Handling %s" % sample)

    sample_dir = "%s%s/" % (args.samples_dir, sample)

    sample_out_dir = "%s%s/" % (args.output_dir, sample)
    FileRoutines.safe_mkdir(sample_out_dir)
    trimmomatic_log = "%s/trimmomatic.log" % sample_out_dir
    trimmomatic_time_log = "%s/trimmomatic.time.log" % sample_out_dir
    output_prefix = "%s%s.TMF" % (sample_out_dir, sample)

    files_from_sample_dir = os.listdir(sample_dir)

    left_reads_file = None
    right_reads_file = None

    for filename in files_from_sample_dir:
        if ("_1.fq" in filename) or ("_1.fastq" in filename):
            left_reads_file = filename
        elif ("_2.fq" in filename) or ("_2.fastq" in filename):
            right_reads_file = filename
    if (left_reads_file is None) and (right_reads_file is None):
Ejemplo n.º 9
0
                    dest="blast_dir",
                    default="",
                    help="Directory with BLAST+ binaries")

args = parser.parse_args()

input_filename_list = FileRoutines.split_filename(args.input)
input_filename = input_filename_list[1] + input_filename_list[2]

workdir_dir = "%s.transdecoder_dir/" % input_filename
pep_from_longest_orfs = "%s/longest_orfs.pep" % workdir_dir

hmmscan_dir = "hmmscan_vs_pfam/"
blastp_dir = "blastp_vs_uniref/"

FileRoutines.safe_mkdir(hmmscan_dir)
FileRoutines.safe_mkdir(blastp_dir)

hmmscan_splited_fasta_dir = "%ssplited_fasta_dir/" % hmmscan_dir
splited_domtblout_dir = "%ssplited_domtblout_dir/" % hmmscan_dir

hmmscan_vs_pfam_prefix = "%s.pfam" % input_filename
hmmscan_vs_pfam_output = "%s/%s.hits" % (hmmscan_dir, hmmscan_vs_pfam_prefix)
domtblout_outfile = "%s/%s.domtblout" % (hmmscan_dir, hmmscan_vs_pfam_prefix)

blastp_outfile = "%s%s.blastp.hits" % (
    blastp_dir, input_filename) if args.blast_database else None
blastp_split_dir = "%ssplited_fasta_dir/" % blastp_dir
blastp_splited_output_dir = "%ssplited_output_dir" % blastp_dir
HMMER3.path = args.hmmer_dir
HMMER3.threads = args.threads