def main():
    args = get_args()
    # setup logging
    log, my_name = setup_logging(args.verbosity, args.log_path)
    text = " Starting {} ".format(my_name)
    log.info(text.center(65, "="))
    alignments = []
    log.info("Getting aligned sequences for trimming")
    for ftype in get_file_extensions(args.input_format):
        alignments.extend(glob.glob(os.path.join(args.input, "*{}".format(ftype))))
    # package up needed arguments for map()
    package = [args.input_format, args.window, args.threshold, args.proportion, args.max_divergence, args.min_length]
    params = zip([package] * len(alignments), alignments)
    log.info("Alignment begins. 'X' indicates dropped alignments (these are reported after alignment)")
    # if --multprocessing, use Pool.map(), else use map()
    # can also extend to MPI map, but not really needed on multicore
    # machine
    if args.cores > 1:
        assert args.cores <= multiprocessing.cpu_count(), "You've specified more cores than you have"
        pool = multiprocessing.Pool(args.cores - 1)
        alignments = pool.map(get_and_trim_alignments, params)
    else:
        alignments = map(get_and_trim_alignments, params)
    # kick the stdout down one line since we were using sys.stdout
    print("")
    # drop back into logging
    log.info("Alignment ends")
    # write the output files
    write_alignments_to_outdir(log, args.output, alignments, args.output_format)
    # end
    text = " Completed {} ".format(my_name)
    log.info(text.center(65, "="))
Ejemplo n.º 2
0
def main(args):
    # setup logging
    log, my_name = setup_logging(args)
    # create the fasta dictionary
    loci = get_fasta_dict(log, args)
    log.info("Aligning with {}".format(str(args.aligner).upper()))
    opts = [[args.window, args.threshold, args.no_trim, args.proportion, args.max_divergence, args.min_length] \
            for i in range(len(loci))]
    # combine loci and options
    params = zip(loci.items(), opts)
    log.info("Alignment begins. 'X' indicates dropped alignments (these are reported after alignment)")
    # During alignment, drop into sys.stdout for progress indicator
    # because logging in multiprocessing is more painful than what
    # we really need.  Return to logging when alignment completes.
    if args.cores > 1:
        assert args.cores <= multiprocessing.cpu_count(), "You've specified more cores than you have"
        pool = multiprocessing.Pool(args.cores)
        alignments = pool.map(align, params)
    else:
        alignments = map(align, params)
    # kick the stdout down one line since we were using sys.stdout
    print("")
    # drop back into logging
    log.info("Alignment ends")
    # write the output files
    write_alignments_to_outdir(log, args.output, alignments, args.output_format)
    # end
    text = " Completed {} ".format(my_name)
    log.info(text.center(65, "="))
Ejemplo n.º 3
0
def main(args):
    if args.aligner == "muscle":
        from phyluce.muscle import Align as align_class
    elif args.aligner == "mafft":
        from phyluce.mafft import Align as align_class
    
    # create the fasta dictionary
    loci = get_fasta_dict(log, args)
    log.info("Aligning with {}".format(str(args.aligner).upper()))
    opts = [[args.window, args.threshold, args.no_trim, args.proportion, args.max_divergence, args.min_length, align_class] \
            for _ in loci]
    # combine loci and options
    params = zip(loci.items(), opts)
    log.info("Alignment begins. 'X' indicates dropped alignments (these are reported after alignment)")
    # During alignment, drop into sys.stdout for progress indicator
    # because logging in multiprocessing is more painful than what
    # we really need.  Return to logging when alignment completes.
    if args.cores > 1:
        assert args.cores <= multiprocessing.cpu_count(), "You've specified more cores than you have"
        pool = multiprocessing.Pool(args.cores)
        alignments = pool.map(align, params)
    else:
        alignments = map(align, params)

    #import pickle
    #with open('/Users/tobias/Desktop/alignments.pickle', 'wb') as handle:
    #    pickle.dump(alignments, handle, protocol=pickle.HIGHEST_PROTOCOL)
    #with open('/Users/tobias/Desktop/alignments.pickle', 'rb') as handle:
    #    alignments = pickle.load(handle)

    # kick the stdout down one line since we were using sys.stdout
    print("")
    # drop back into logging
    log.info("Alignment ends")
    # write the output files
    for name, alignment in alignments:
        if alignment.trimmed:
            for t in alignment.trimmed:
                t.id = t.id.split('_', 3)[3]
                t.name = t.id
                t.description = ''
    write_alignments_to_outdir(log, args.output, alignments, args.output_format)
    # end
    text = " Completed! "
    log.info(text.center(65, "="))
def main(args):
    if args.aligner == "muscle":
        from phyluce.muscle import Align as align_class
    elif args.aligner == "mafft":
        from phyluce.mafft import Align as align_class

    # create the fasta dictionary
    loci = get_fasta_dict(log, args)
    log.info("Aligning with {}".format(str(args.aligner).upper()))
    opts = [[
        args.window, args.threshold, args.no_trim, args.proportion,
        args.max_divergence, args.min_length, args.gap_opening_penalty,
        args.gap_extension_penalty, align_class
    ] for _ in loci]
    # combine loci and options
    params = zip(loci.items(), opts)
    log.info(
        "Alignment begins. 'X' indicates dropped alignments (these are reported after alignment)"
    )
    # During alignment, drop into sys.stdout for progress indicator
    # because logging in multiprocessing is more painful than what
    # we really need.  Return to logging when alignment completes.
    if args.cores > 1:
        assert args.cores <= multiprocessing.cpu_count(
        ), "You've specified more cores than you have"
        pool = multiprocessing.Pool(args.cores)
        alignments = pool.map(align, params)
    else:
        alignments = map(align, params)

    #import pickle
    #with open('/Users/tobias/Desktop/alignments.pickle', 'wb') as handle:
    #    pickle.dump(alignments, handle, protocol=pickle.HIGHEST_PROTOCOL)
    #with open('/Users/tobias/Desktop/alignments.pickle', 'rb') as handle:
    #    alignments = pickle.load(handle)

    # kick the stdout down one line since we were using sys.stdout
    print("")
    # drop back into logging
    log.info("Alignment ends")
    # write the output files
    for name, alignment in alignments:
        if alignment.trimmed:
            for t in alignment.trimmed:
                locus_name = t.description.split('|')[-1]
                rest_of_string = '|'.join(t.description.split('|')[:-1])
                string_to_replace = '%s_' % str(locus_name)
                new_string = t.id
                # fix the fasta header, also removing the occasional _R_ resulting from reverse contigs
                tmp = re.sub(string_to_replace, '', new_string, 1)
                t.id = re.sub('_R_', '', tmp, 1)
                t.name = t.id
                t.description = ''
    write_alignments_to_outdir(log, args.output, alignments,
                               args.output_format)
    try:
        #input_folder = '/'.join(args.sequences.split('/')[:-2])
        pickle_path = os.path.join(args.output, '.secapr_files')
        if not os.path.exists(pickle_path):
            os.makedirs(pickle_path)
        with open(os.path.join(pickle_path, 'sequence_origin.pickle'),
                  'wb') as handle:
            pickle.dump(args.sequences,
                        handle,
                        protocol=pickle.HIGHEST_PROTOCOL)
        # end
        text = " Completed! "
        log.info(text.center(65, "="))
    except:
        print('Could not pass origin of sequences to %s' % pickle_path)