Exemple #1
0
def excise_precursors():
    global file_genome, parsed_arf, dir_tmp, stack_height_min, dir_tmp, max_pres
    # excise precursors from the genome
    pprint("#excising precursors\n")
    print_stderr("#excising precursors\n")

    start()
    ret_excise_precursors = None

    if options.get('-a'):
        cmd = "excise_precursors.py {} {}/{}_parsed.arf {}/precursors.coords -a {} > {}/precursors.fa\n\n".format(
            file_genome, dir_tmp, parsed_arf, dir_tmp, stack_height_min,
            dir_tmp)
        print_stderr(cmd)
        ret_excise_precursors = os.popen(cmd).read()
    else:
        cmd = "excise_precursors_iterative_final.py {} {}/{}_parsed.arf {}/precursors.fa {}/precursors.coords {}\n".format(
            file_genome, dir_tmp, parsed_arf, dir_tmp, dir_tmp, max_pres)
        print_stderr(cmd)
        ret_excise_precursors = os.popen(cmd).read()

        fname = '{}/precursors.fa_stack'.format(dir_tmp)
        OSS = open_or_die2(fname, 'rb')
        stack_height_min = OSS.readline().strip()
        OSS.close()

    end()

    fname = '{}/precursors.fa'.format(dir_tmp)
    # if (-z "$dir_tmp/precursors.fa" or not -f "$dir_tmp/precursors.fa"):
    if not file_s(fname) or not os.path.isfile(
            fname):  # empty or not a regular plain file
        die("No precursors excised\n")

    return 0
def resolve_potential_precursor():
    '''
    dissects the potential precursor in parts by filling hashes, and tests if it passes the
    initial filter and the scoring filter
    binary variable whether the potential precursor is still viable
    '''
    global hash_seq, db_old, hash_struct

    fill_structure()

    fill_pri()

    fill_mature()

    fill_star()

    fill_loop()

    # if db_old == 'CHROMOSOME_I_165':
    # print_stderr(hash_comp, "\n\n")
    # print_stderr(hash_query, "\n\n")
    # print_stderr(hash_bp, '\n')

    if pass_filtering_initial():

        seq = hash_seq[db_old]
        struct = hash_struct[db_old]

        pprint("{}\n".format(db_old))

    reset_variables()
Exemple #3
0
def perform_controls():
    global dir_tmp, _dir, file_mature_ref_other_species, ltime
    # run permuted controls:
    pprint("#running permuted controls\n")
    print_stderr("#running permuted controls\n")
    start()
    line = None

    if not re.search('none', file_mature_ref_other_species, re.IGNORECASE):
        line = "core_algorithm.py {}/signature.arf {}/precursors.str -s {}/{} -v -50".format(
            dir_tmp, dir_tmp, dir_tmp, file_mature_ref_other_species)
    else:
        line = "core_algorithm.py {}/signature.arf {}/precursors.str -v -50".format(
            dir_tmp, dir_tmp)

    if not (options.get('-c') == ''):
        line += " -y {}/precursors_for_randfold.rand".format(dir_tmp)

    cmd = "echo '{} > {}/output.mrd' > {}/command_line\n\n".format(
        line,
        _dir,
        dir_tmp,
    )
    print_stderr(cmd)
    ret_command_line = os.system(cmd)
    cmd = "perform_controls.py {}/command_line {}/precursors.str 100 -a > {}/output_permuted.mrd 2>>error_{}.log\n\n".format(
        dir_tmp, dir_tmp, dir_tmp, ltime)
    print_stderr(cmd)
    ret_perform_controls = os.system(cmd)
    end()
Exemple #4
0
def compute_randfold():
    global options, dir_tmp
    if options.get('-c') == '':
        return

    # compute randfold p-values for the subset of precursors which are
    # plausible Dicer substrates

    pprint("#computing randfold p-values\n")
    print_stderr("#computing randfold p-values\n")
    cmd = "select_for_randfold.py {}/signature.arf {}/precursors.str > {}/precursors_for_randfold.ids\n\n".format(
        dir_tmp, dir_tmp, dir_tmp)
    print_stderr(cmd)
    start()
    ret_select_for_randfold = os.system(cmd)
    end()

    start()
    cmd = "fastaselect.py {}/precursors.fa {}/precursors_for_randfold.ids > {}/precursors_for_randfold.fa\n\n".format(
        dir_tmp, dir_tmp, dir_tmp)
    print_stderr(cmd)
    ret_fasta_select = os.system(cmd)
    end()

    start()
    cmd = "randfold -s {}/precursors_for_randfold.fa 99 > {}/precursors_for_randfold.rand\n\n".format(
        dir_tmp, dir_tmp)
    print_stderr(cmd)
    ret_randfold = os.system(cmd)
    end()
def perform_controls(_dir, rounds, command_line, options):
    os.mkdir(_dir)
    _round = 1

    while _round <= rounds:
        if options.get('-a') == '':
            print_stderr('{}\r'.format(_round))

        cmd = 'permute_structure.py {} > {}/precursors_permuted.str 2> /dev/null'.format(
            file_structure, _dir)
        # print(cmd)
        os.system(cmd)

        pprint('permutation {}\n\n'.format(_round))
        cmd = '{} 2> /dev/null'.format(command_line)
        os.system(cmd)
        # ret = os.popen(cmd).read().strip()
        # pprint(ret)

        _round += 1

    shutil.rmtree(_dir)

    if options.get('-a') == '':
        print_stderr('controls performed\n\n')
Exemple #6
0
def survey_known(score):
    '''
    summary statistics for mature miRNAs
    '''
    global hash_ref, hash_sig

    # mature miRNAs for the species in reference (miRBase) file
    matures_cnt = len(hash_ref)
    pprint('\t{}'.format(matures_cnt))

    # matures present in data
    _matures_present = len(hash_sig.keys())
    pprint("\t{}".format(_matures_present))

    # matures recovered
    matures_recov_cnt = 0
    matures_present = hash_sig.keys()
    for mature_present in matures_present:
        score_mature_present = hash_sig[mature_present]
        if score <= score_mature_present:
            matures_recov_cnt += 1
    pprint("\t{}".format(matures_recov_cnt))

    # matures recovered in percent
    percent = 100 * round_decimals(div(matures_recov_cnt, _matures_present), 2)
    pprint(" ({:.0f}%)".format(percent))
Exemple #7
0
def core_algorithm():
    '''
    run moRNA Finder core algorithm
    '''
    global _dir, dir_tmp, file_mature_ref_other_species, ltime
    pprint("#running moRNA Finder core algorithm\n")
    print_stderr("#running moRNA Finder core algorithm\n")
    line = None

    longest_id = 40
    if not re.search('none', file_mature_ref_this_species, re.IGNORECASE):
        longest_id = get_longest_id("{}/{}".format(
            dir_tmp, file_mature_ref_this_species))

    start()

    if not re.search('none', file_mature_ref_other_species, re.IGNORECASE):
        line = "core_algorithm.py {}/signature.arf {}/precursors.str -s {}/{} -v -50 -l {}".format(
            dir_tmp, dir_tmp, dir_tmp, file_mature_ref_other_species,
            longest_id)
    else:
        line = "core_algorithm.py {}/signature.arf {}/precursors.str -v -50 -l {}".format(
            dir_tmp, dir_tmp, longest_id)

    if not options.get('-c') == '':
        line += " -y {}/precursors_for_randfold.rand".format(dir_tmp)

    cmd = "{} > {}/output.mrd\n".format(line, _dir)
    print_stderr(cmd)
    ret_mor_core = os.system(cmd)
    if options.get('-E'):
        ret_mor_core = os.system('{} -t > {}/error.output.mrd'.format(
            line, _dir))

    end()

    # check if file is empty
    fname = "{}/output.mrd".format(_dir)
    if not file_s(fname):
        print_stderr("Error:\n\tFile {} is empty\n\n".format(fname))
        print_stderr(
            "Now running core_algorithm.py with option -t to see why all precursors were discarded\n"
        )
        ret_mor_core = os.system('{} -t > error.output.mrd_{}'.format(
            line, ltime))
        print_stderr(
            "The debug file is called error.output.mrd_{}\n".format(ltime))
        die("\nExiting now\n\n")
Exemple #8
0
def parse_mappings():
    global file_reads_vs_genome, parsed_arf, dir_tmp
    # parse mappings to retain only perfect mappings of reads 18 nt <= length
    # <= 25 nt that map perfectly to five loci or less
    pprint("#parsing genome mappings\n")
    print_stderr("#parsing genome mappings\n")

    cmd = "parse_mappings.py {} -a 0 -b 18 -c 25 -i 5 > {}/{}_parsed.arf\n\n".format(
        file_reads_vs_genome, dir_tmp, parsed_arf)
    print_stderr(cmd)

    start()
    ret_parse_mappings = os.popen(cmd).read()
    end()

    return 0
Exemple #9
0
def parse_fasta(file_fasta):
    FASTA = open_or_die(file_fasta, 'rb',
                        'can not open {}\n'.format(file_fasta))
    while True:
        line = FASTA.readline()
        if not line:
            break

        m = re.match(r'^(>\S+)', line)
        if m:
            pprint('{}\n'.format(m.groups()[0]))
        else:
            pprint('{}'.format(re.sub('U', 'T', line).upper()))

    FASTA.close()
    return
Exemple #10
0
def survey_signal_to_noise(score):
    # total hairpins reported
    hairpins_total = hairpins_cnt("total", score)

    (hairpins_total_fp_mean, hairpins_total_fp_sd, estimated_total_true_mean, estimated_total_true_sd,
     percent_total_mean, percent_total_sd) = mean_sd("total", score, hairpins_total)

    hairpins_total_fp_mean_round = round_decimals(hairpins_total_fp_mean, 0)

    hairpins_total_fp_sd_round = round_decimals(hairpins_total_fp_sd, 0)

    # print "\t$hairpins_total_fp_mean_round +/- $hairpins_total_fp_sd_round";

    signal_to_noise_total = 0
    if hairpins_total_fp_mean == 0:
        signal_to_noise_total = 0
    else:
        signal_to_noise_total = round_decimals(
            div(hairpins_total, hairpins_total_fp_mean), 1)

    pprint("\t{}".format(signal_to_noise_total))
Exemple #11
0
def fold_precursors():
    '''
    predicting RNA secondary structures with RNAfold
    '''
    global dir_tmp, ltime
    pprint("#folding precursors\n")
    print_stderr("#folding precursors\n")
    print_stderr(
        "RNAfold < {}/precursors.fa -noPS > {}/precursors.str\n\n".format(
            dir_tmp, dir_tmp))
    start()
    ret_fold_precursors = os.system(
        "RNAfold < {}/precursors.fa -noPS > {}/precursors.str 2>>error_{}.log".
        format(dir_tmp, dir_tmp, ltime))
    if ret_fold_precursors:
        ret_fold_precursors = os.system(
            "RNAfold < {}/precursors.fa --noPS > {}/precursors.str".format(
                dir_tmp, dir_tmp))
        if ret_fold_precursors:
            die("Some RNAfold error occurred. Error {}\n".format(
                ret_fold_precursors))

    end()
Exemple #12
0
def make_survey():
    # get overview of the output:
    global _dir, dir_tmp, file_mature_ref_this_species, stack_height_min
    pprint("#doing survey of accuracy\n")
    print_stderr("#doing survey of accuracy\n")

    if not re.search('none', file_mature_ref_this_species, re.IGNORECASE):
        cmd = "survey.py {}/output.mrd -a {}/output_permuted.mrd -b {}/{} -c {}/signature.arf -d {} > {}/survey.csv\n\n".format(
            _dir, dir_tmp, dir_tmp, file_mature_ref_this_species, dir_tmp,
            stack_height_min, _dir)
        print_stderr(cmd)
        start()
        ret_survey = os.system(cmd)
        end()

    else:

        cmd = "survey.py {}/output.mrd -a {}/output_permuted.mrd -d {} > {}/survey.csv\n\n".format(
            _dir, dir_tmp, stack_height_min, _dir)
        print_stderr(cmd)
        start()
        ret_survey = os.system(cmd)
        end()
Exemple #13
0
def prepare_signature():
    '''
    prepare signature file
    '''
    global file_reads, dir_tmp, read_align_mismatches, file_mature_ref_this_species, ltime
    pprint("#preparing signature\n")
    print_stderr("#preparing signature\n")

    if not re.search('none', file_mature_ref_this_species, re.IGNORECASE):
        cmd = "prepare_signature.py {} {}/precursors.fa {} -a {}/{} -o {}/signature.arf 2>>error_{}.log\n\n".format(
            file_reads, dir_tmp, read_align_mismatches, dir_tmp,
            file_mature_ref_this_species, dir_tmp, ltime)
        print_stderr(cmd)
        start()
        ret_prepare_signature = os.popen(cmd).read()
        end()
    else:
        cmd = "prepare_signature.py {} {}/precursors.fa {} -o {}/signature.arf 2>>error_{}.log\n\n".format(
            file_reads, dir_tmp, read_align_mismatches, dir_tmp, ltime)
        start()
        ret_prepare_signature = os.popen(cmd).read()
        end()

    return 0
Exemple #14
0
def survey(options):
    # print(hash_con)
    # print_stderr(hash_out, '\n')
    # print(hash_sig)
    # print(hash_out)
    for score in range(10, -11, -1):
        pprint(score)

        if options.get('-b'):
            survey_hairpins(score)

            survey_known(score)

        if options.get('-a'):
            survey_signal_to_noise(score)

        if options.get('-d'):
            read_stack_min = options.get('-d')
            pprint('\t{}'.format(read_stack_min))

        pprint('\n')
Exemple #15
0
def survey_hairpins(score):
    '''
    summary statistics for miRNA hairpin precursors
    '''

    # partitioning of hairpins into known and novel
    hairpins_known = hairpins_cnt("known", score)
    hairpins_novel = hairpins_cnt("novel", score)

    # print_stderr(hairpins_novel, "\t")

    pprint('\t{}'.format(hairpins_novel))

    # estimation of false positives for the set of novel hairpins
    if options.get('-a'):
        (hairpins_fp_mean, hairpins_fp_sd, estimated_true_mean,
         estimated_true_sd, percent_mean, percent_sd) = (0, 0, 0, 0, 0, 0)

        if hairpins_novel:  # check if novel hairpins detected
            (hairpins_fp_mean, hairpins_fp_sd, estimated_true_mean, estimated_true_sd,
             percent_mean, percent_sd) = mean_sd("novel", score, hairpins_novel)

        hairpins_fp_mean_round = round_decimals(hairpins_fp_mean, 0)
        hairpins_fp_sd_round = round_decimals(hairpins_fp_sd, 0)
        pprint("\t{} +/- {}".format(hairpins_fp_mean_round, hairpins_fp_sd_round))

        estimated_true_mean_round = round_decimals(estimated_true_mean, 0)
        pprint("\t{}".format(estimated_true_mean_round))

        estimated_true_sd_round = round_decimals(estimated_true_sd, 0)
        pprint(" +/- {}".format(estimated_true_sd_round))

        percent_mean_round = round_decimals(percent_mean, 0)
        pprint(" ({}".format(percent_mean_round))

        percent_sd_round = round_decimals(percent_sd, 0)
        pprint(" +/- {}%)".format(percent_sd_round))
Exemple #16
0
def print_header(options):
    pprint("moRNA Finder score")

    if options.get('-b'):
        pprint("\tnovel miRNAs reported by moRNA Finder")

    if options.get('-a'):
        pprint("\tnovel miRNAs, estimated false positives")

        pprint("\tnovel miRNAs, estimated true positives")

    pprint("\tknown miRNAs in species")

    pprint("\tknown miRNAs in data")

    pprint("\tknown miRNAs detected by moRNA Finder")

    if options.get('-a'):
        pprint("\testimated signal-to-noise")

    if options.get('-d'):
        pprint("\texcision gearing")

    pprint("\n")
def parse_file_arf(file_arf, options):
    global running, gscan, hash_edits
    FILE_ARF = open_or_die(file_arf, 'rb',
                           'can not open {}\n'.format(file_arf))
    while True:
        line = FILE_ARF.readline()
        if not line:
            break

        m = re.match(
            r'^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)',
            line)
        if m:
            m = m.groups()
            query = m[0]
            query_map_lng = int(m[1])
            query_beg = m[2]
            query_end = int(m[3])
            query_seq = m[4]
            db = m[5]
            db_map_lng = int(m[6])
            db_beg = m[7]
            db_end = int(m[8])
            db_seq = m[9]
            strand = m[10]
            edits = int(m[11])
            edit_string = m[12]

            running += 1
            if options.get('-j') == '':
                (query_map_lng, query_end, query_seq, db_map_lng, db_end,
                 db_seq, edits, edit_string) = remove_trailing_nts(
                     query_map_lng, query_end, query_seq, db_map_lng, db_end,
                     db_seq, edits, edit_string)

            if '-a' in options.keys() and int(options.get('-a')) < edits:
                continue

            if options.get('-b') and query_map_lng < int(options.get('-b')):
                continue

            if options.get('-c') and int(options.get('-c')) < query_map_lng:
                continue

            if options.get('-d') and query not in hash_queries_incl.keys():
                continue

            if options.get('-e') and query in hash_queries_excl.keys():
                continue

            if options.get('-f') and db not in hash_dbs_incl.keys():
                continue

            if options.get('-g') and db in hash_dbs_excl.keys():
                continue

            if not (options.get('-h') == '' or options.get('-i')):
                pprint('{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.
                       format(query, query_map_lng, query_beg, query_end,
                              query_seq, db, db_map_lng, db_beg, db_end,
                              db_seq, strand, edits, edit_string))
                continue

            if gscan:
                create_hash_key_chain(hash_edits, 0, query, edits)
                hash_edits[query][edits] += 1
            else:
                evaluation = evaluate_query(query, edits, options)
                if evaluation:
                    pprint(
                        "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".
                        format(query, query_map_lng, query_beg, query_end,
                               query_seq, db, db_map_lng, db_beg, db_end,
                               db_seq, strand, edits, edit_string))
Exemple #18
0
def test_input_files():
    global file_reads, file_reads_vs_genome, file_genome, file_precursors, minpreslen, file_mature_ref_other_species, file_mature_ref_this_species
    IN = open_or_die2(file_reads, 'rb')
    line = IN.readline().strip()
    if not re.search(r'^>\S+', line):
        printErr()
        die("The first line of file $file_reads does not start with '>identifier'\nReads file {} is not a valid fasta file\n\n"
            .format(file_reads))

    if re.search(r'\s', line):
        printErr()
        die('File {} contains not allowed characters in sequences\nAllowed characters are ACGTUN\nReads file {} is not a fasta file\n\n'
            .format(file_reads, file_reads))

    line = IN.readline()
    if not re.search(r'^[ACGTUNacgtun]*$', line):
        printErr()
        die('File {} contains not allowed characters in sequences\nAllowed characters are ACGTUN\nReads file {} is not a fasta file\n\n'
            .format(file_reads, file_reads))

    IN.close()

    IN = open_or_die2(file_genome, 'rb')
    line = IN.readline().strip()
    if not re.search(r'>\S+', line):
        printErr()
        die("The first line of file {} does not start with '>identifier'\nGenome file {} is not a fasta file\n\n"
            .format(file_genome, file_genome))

    if re.search(r'\s', line):
        printErr()
        die('Genome file {} has not allowed whitespaces in its first identifier\n\n'
            .format(file_genome))

    # get genome ids
    tmps = os.popen('grep ">" {}'.format(file_genome)).read().strip()
    genomeids = dict(map(lambda x: (x, 1), re.split("\n", tmps)))

    line = IN.readline()
    if not re.search(r'^[ACGTUNacgtun]*$', line):
        printErr()
        die('File {} contains not allowed characters in sequences\nAllowed characters are ACGTUN\nGenome file {} is not a fasta file\n\n'
            .format(file_genome, file_genome))

    IN.close()

    IN = open_or_die2(file_reads_vs_genome, 'rb')
    line = IN.readline()
    if not re.search(
            r'^(\S+_x\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+([+-])\s+(\d+)\s*([mDIM]*)$',
            line):
        printErr()
        die('Mapping file {} is not in arf format\n\nEach line of the mapping file must consist of the following fields\nreadID_wo_whitespaces  length  start  end read_sequence genomicID_wo_whitspaces length  start   end     genomic_sequence  strand  #mismatches editstring\nThe editstring is optional and must not be contained\nThe readID must end with _xNumber and is not allowed to contain whitespaces.\nThe genomeID is not allowed to contain whitespaces.'
            .format(file_reads_vs_genome))

    IN.close()

    # get ids from arf file and compare them with ids from the genome file
    tmps = os.popen(
        'cut -f6 {}|sort -u'.format(file_reads_vs_genome)).read().strip()
    for s in re.split("\n", tmps):
        if not genomeids.get(">{}".format(s)):
            die("The mapped reference id {} from file {} is not an id of the genome file {}\n\n"
                .format(s, file_reads_vs_genome, file_genome))

    if not re.search('none', file_mature_ref_this_species):
        IN = open_or_die2(file_mature_ref_this_species, 'rb')
        line = IN.readline().strip()
        if not re.search(r'>\S+', line):
            printErr()
            die("The first line of file {} does not start with '>identifier'\nmiRNA reference this species file {} is not a fasta file\n\n"
                .format(file_mature_ref_this_species,
                        file_mature_ref_this_species))

        if re.search(r'\s', line):
            printErr()
            die("miRNA reference this species file {} has not allowed whitespaces in its first identifier\n\n"
                .format(file_mature_ref_this_species))

        line = IN.readline()
        if not re.search(r'^[ACGTUNacgtun]*$', line):
            printErr()
            die("File {} contains not allowed characters in sequences\nAllowed characters are ACGTUN\nmiRNA reference this species file {} is not a fasta file\n\n"
                .format(file_mature_ref_this_species,
                        file_mature_ref_this_species))

        IN.close()

    if not re.search('none', file_mature_ref_other_species):
        IN = open_or_die2(file_mature_ref_other_species, 'rb')
        line = IN.readline().strip()
        if not re.search(r'>\S+', line):
            printErr()
            die("The first line of file {} does not start with '>identifier'\nmiRNA reference this species file {} is not a fasta file\n\n"
                .format(file_mature_ref_other_species,
                        file_mature_ref_other_species))

        if re.search(r'\s', line):
            printErr()
            die("miRNA reference this species file {} has not allowed whitespaces in its first identifier\n\n"
                .format(file_mature_ref_other_species))

        line = IN.readline()
        if not re.search(r'^[ACGTUNacgtun]*$', line):
            printErr()
            die("File {} contains not allowed characters in sequences\nAllowed characters are ACGTUN\nmiRNA reference this species file {} is not a fasta file\n\n"
                .format(file_mature_ref_other_species,
                        file_mature_ref_other_species))

        IN.close()

    if not re.search('none', file_precursors):
        IN = open_or_die2(file_precursors, 'rb')
        line = IN.readline().strip()
        if not re.search(r'>\S+', line):
            printErr()
            die("The first line of file {} does not start with '>identifier'\nmiRNA reference this species file {} is not a fasta file\n\n"
                .format(file_precursors, file_precursors))

        if re.search(r'\s', line):
            printErr()
            die("precursor file {} has not allowed whitespaces in its first identifier\n\n"
                .format(file_precursors))

        line = IN.readline()
        if not re.search(r'^[ACGTUNacgtun]*$', line):
            printErr()
            die("File {} contains not allowed characters in sequences\nAllowed characters are ACGTUN\nmiRNA reference this species file {} is not a fasta file\n\n"
                .format(file_precursors, file_precursors))

        if len(line) < minpreslen:
            printErr()
            die("The precursor file {} does not contain sequences of at least {} nt\nPlease make sure that you provided the correct file and the correct parameter ordering when calling {}\nIf you have precursors with less than {} please use option -p <int> to specify this length\n"
                .format(file_precursors, minpreslen, sys.argv[0], minpreslen))

        IN.close()

    # #################################################
    # precheck finished
    # #################################################

    # do stringent testing of all input files
    pprint("#testing input files\n")
    print_stderr("#testing input files\n")

    if not re.search('none', file_mature_ref_this_species):
        start()
        cmd = "sanity_check_mature_ref.py {} 2>&1\n\n".format(
            file_mature_ref_this_species)
        print_stderr(cmd)
        ret_file_mature_ref_this_species = os.popen(cmd).read().strip()

        if ret_file_mature_ref_this_species:
            printErr()
            die("problem with {} {}\n".format(
                file_mature_ref_this_species,
                ret_file_mature_ref_this_species))
        end()

    if not re.search(r'none', file_mature_ref_other_species):
        start()

        cmd = "sanity_check_mature_ref.py {} 2>&1\n\n".format(
            file_mature_ref_other_species)
        print_stderr(cmd)
        ret_file_mature_ref_other_species = os.popen(cmd).read().strip()

        if ret_file_mature_ref_other_species:
            printErr()
            die("problem with {} {}\n".format(
                file_mature_ref_other_species,
                ret_file_mature_ref_other_species))
        end()

    cmd = "sanity_check_reads_ready_file.py {} 2>&1\n\n".format(file_reads)
    print_stderr(cmd)
    start()
    ret_test_file_reads = os.popen(cmd).read().strip()

    if ret_test_file_reads:
        printErr()
        die("problem with {} {}\n".format(file_reads, ret_test_file_reads))

    end()

    start()
    cmd = "sanity_check_genome.py {} 2>&1;\n\n".format(file_genome)
    print_stderr(cmd)
    ret_test_file_genome = os.popen(cmd).read().strip()

    if ret_test_file_genome:
        printErr()
        die("problem with {} {}\n".format(file_genome, ret_test_file_genome))

    end()
    start()

    cmd = "sanity_check_mapping_file.py {} 2>&1".format(file_reads_vs_genome)
    print_stderr(cmd)
    ret_test_file_reads_genome = os.popen(cmd).read().strip()

    if ret_test_file_reads_genome:
        printErr()
        die("problem with {} {}\n".format(file_reads_vs_genome,
                                          ret_test_file_reads_genome))

    end()

    if not re.search('none', file_precursors):
        start()

        cmd = "sanity_check_mature_ref.py {} 2>&1".format(file_precursors)
        print_stderr(cmd)
        ret_file_precursors = os.popen(cmd).read().strip()

        if ret_file_precursors:
            printErr()
            die("problem with {} {}\n".format(file_precursors,
                                              ret_file_precursors))

        end()

        start()
        if not re.search('none', file_mature_ref_this_species, re.IGNORECASE):
            print_stderr("Quantitation of expressed miRNAs in data\n\n\n")

            species = ''
            if options.get('-t'):
                species = "-t {}".format(options.get('-t'))

            file_star = ''
            if options.get('-s'):
                if file_s(options.get('-s')):
                    file_star = "-s {}".format(options.get('-s'))
                else:
                    print_stderr(
                        "File {} specified by option -s is empty or not found\n"
                        .format(options.get('-s')))
                    options['-s'] = 0

            print("#Quantitation of known miRNAs in data\n")
            dopt = ""
            Popt = ""
            if options.get('-d') == '':
                dopt = "-d"
            if options.get('-P') == '':
                Popt = "-P"

            quant = "quantifier.py -p {} -m {} -r {} {} {} -y {} -k {} {}".format(
                file_precursors, file_mature_ref_this_species, file_reads,
                file_star, species, ltime, dopt, Popt)
            print_stderr(quant, "\n")
            os.system(quant)
            options[
                '-q'] = "expression_analyses/expression_analyses_{}/miRBase.mrd".format(
                    ltime)

            end()
        else:
            print_stderr(
                "Pre-quantitation is skipped caused by missing file with known miRNAs\n\n\n"
            )

    else:
        print_stderr(
            "Pre-quantitation is skipped caused by missing file with known precursor miRNAs\n\n\n"
        )
Exemple #19
0
def output_results():
    '''
    making final results html file:
    '''
    global options, dir_tmp, ltime, version, scripts, file_mature_ref_this_species
    pprint("#producing graphic results\n")
    print_stderr("#producing graphic results\n")
    start()

    # sort aligned reads in pdf not by sample if option -o is given
    sort_by_sample = '-o'
    if options.get('-o'):
        sort_by_sample = ''

    line = None

    # choose file to use for counting miRNAs in data
    xopt = "{}/signature.arf".format(dir_tmp)
    if os.path.isfile(
            "expression_analyses/expression_analyses_{}/miRNA_expressed.csv".
            format(ltime)):
        xopt = "expression_analyses/expression_analyses_{}/miRNA_expressed.csv".format(
            ltime)

    sc = 0
    if options.get('-b'):
        sc = options.get('-b')

    OE = ""
    if options.get('-E') == '':
        OE = " -E"

    if not re.search('none', file_mature_ref_this_species, re.IGNORECASE):
        if options.get('-q'):
            line = "make_html.py -f {}/output.mrd -k {}/{} -p {}/precursors.coords -s {}/survey.csv -c -e -q {} -x {} -r {}Rfam_for_moR.fa -v {} -y {} {} {}".format(
                _dir, dir_tmp, file_mature_ref_this_species, dir_tmp, _dir,
                options.get('-q'), xopt, scripts, sc, ltime, sort_by_sample,
                OE)
        else:
            line = "make_html.py -f {}/output.mrd -k {}/{} -p {}/precursors.coords -s {}/survey.csv -c -e -r {}Rfam_for_moR.fa -v {} -y {}  {} {}".format(
                _dir, dir_tmp, file_mature_ref_this_species, dir_tmp, _dir,
                scripts, sc, ltime, sort_by_sample, OE)
    else:
        if options.get('-q'):
            line = "make_html.py -f {}/output.mrd -p {}/precursors.coords -s {}/survey.csv -c -e -q {}  -x {} -r {}Rfam_for_moR.fa -v {} -y {} {} {}".format(
                _dir, dir_tmp, _dir, options.get('-q'), xopt, scripts, sc,
                ltime, sort_by_sample, OE)
        else:
            line = "make_html.py -f {}/output.mrd -p {}/precursors.coords -v {} -s {}/survey.csv -c -e -r {}Rfam_for_moR.fa -y {} {} {}".format(
                _dir, dir_tmp, sc, _dir, scripts, ltime, sort_by_sample, OE)

    if options.get('-t'):
        line += " -t {}".format(options.get('-t'))

    dopt = ""
    if options.get('-d'):
        dopt = "-d"

    cmd = '{} -V {} {}\n\n'.format(line, version, dopt)
    print_stderr(cmd)
    ret_make_html = os.system(cmd)

    end()
Exemple #20
0
if __name__ == '__main__':
    # measuring times
    (sTime, eTime, stime, etime, sTimeG, eTimeG, stimeG,
     etimeG) = (None, None, None, None, None, None, None, None)
    (second, minute, hour, dayOfMonth, month, yearOffset, dayOfWeek, dayOfYear,
     daylightSavings) = (None, None, None, None, None, None, None, None, None)
    (second, minute, hour, dayOfMonth, month, yearOffset, dayOfWeek, dayOfYear,
     daylightSavings) = localtime()

    if re.search(r'^\d$', str(second)):
        second = "0{}".format(second)

    sTimeG = "{}:{}:{}".format(hour, minute, second)
    stimeG = int(time.time())

    pprint("moRNA Finder started at {}\n\n\n".format(sTimeG))

    ctime = int(time.time())
    ltime = myTime(ctime)

    scripts = os.popen('which moR.py').read()
    # scripts = re.sub(r'moR.py', '', scripts, count=1)
    # scripts = re.sub(r'\s+', '', scripts)
    scripts = os.path.dirname(scripts) + '/'

    pprint('#Starting moRNA Finder\n')
    print_stderr('#Starting moRNA Finder\n{} {}\n\n'.format(
        sys.argv[0], ' '.join(sys.argv[1:])))
    print_stderr("moRNA Finder started at {}\n\n\n".format(sTimeG))

    test_first_argument()