Beispiel #1
0
def _profile_load_files_shared(max_read_size, min_read_no, min_read_size,
                               seq_file_list):
    """
    Shared function for loading seq files
    :param max_read_size: exclude reads with lengths > max_read_size (int)
    :param min_read_no: exclude reads with counts below min_read_no (int)
    :param min_read_size: exclude reads with lengths < min_read_size (int)
    :param seq_file_list: [path/to/seq/, path/to/seq2,...] (list(str))
    """
    print(
        colored("-----------------LOADING SEQUENCES----------------", 'green'))
    seq = SRNASeq()
    if len(seq_file_list) == 1:
        seq.load_seq_file(seq_file_list[0], max_read_size, min_read_no,
                          min_read_size)
    else:
        seq.load_seq_file_arg_list(seq_file_list, max_read_size, min_read_no,
                                   min_read_size)
    seq_name = ah.single_file_output(seq_file_list[0])
    if len(seq_file_list) > 1:
        for i in range(len(seq_file_list)):
            if i == 0:
                pass
            else:
                seq_name += "_{0}".format(
                    ah.single_file_output(seq_file_list[i]))
    return seq, seq_name
Beispiel #2
0
def _cdp_output(counts_by_ref, file_fig, file_name, onscreen, no_csv, seq_name_1,
                seq_name_2, ref_file, nt, pub, bok):
    """
    Organise csv or pdf output for CDP analysis
    """
    ref_name = ah.single_file_output(ref_file)
    if file_fig or onscreen:

        if file_name == "auto":
            file_name = ah.cdp_file_output(seq_name_1,
                                           seq_name_2,
                                           ref_name,
                                           nt,
                                           "pdf")
        pr.cdp_plot(counts_by_ref,
                    seq_name_1,
                    seq_name_2,
                    nt,
                    onscreen,
                    file_fig,
                    file_name,
                    pub,
                    bok)

    if no_csv:
        out_csv_name = ah.cdp_file_output(seq_name_1,
                                          seq_name_2,
                                          ref_name,
                                          nt,
                                          "csv")

        wtf.cdp_output(counts_by_ref,
                       seq_name_1,
                       seq_name_2,
                       out_csv_name)
Beispiel #3
0
def _cdp_load_files_shared(max_read_size, min_read_no, min_read_size,
                           seq_file_list_1, seq_file_list_2):
    """

    :param max_read_size: exclude reads with lengths > max_read_size (int)
    :param min_read_no: exclude reads with counts below min_read_no (int):
    :param min_read_size: exclude reads with lengths < min_read_size (int):
    :param seq_file_list_1: [path/to/seq/, path/to/seq2,...] (list(str))
    :param seq_file_list_2: [path/to/seq/, path/to/seq2,...] (list(st_2:
    :return: seq1(sRNASeq), seq2 (sRNASeq), seq_name_1 (str), seq_name_2 (str)
    """
    print(
        colored("-----------------LOADING SEQUENCES----------------", 'green'))
    seq_1 = SRNASeq()
    if len(seq_file_list_1) == 1:
        seq_1.load_seq_file(seq_file_list_1[0], max_read_size, min_read_no,
                            min_read_size)
    else:
        seq_1.load_seq_file_arg_list(seq_file_list_1, max_read_size,
                                     min_read_no, min_read_size)
    seq_2 = SRNASeq()
    if len(seq_file_list_2) == 1:
        seq_2.load_seq_file(seq_file_list_2[0], max_read_size, min_read_no,
                            min_read_size)
    else:
        seq_2.load_seq_file_arg_list(seq_file_list_2, max_read_size,
                                     min_read_no, min_read_size)

    seq_name_1 = ah.single_file_output(seq_file_list_1[0])
    if len(seq_file_list_1) > 1:
        for i in range(len(seq_file_list_1)):
            if i == 0:
                pass
            else:
                seq_name_1 += "_{0}".format(
                    ah.single_file_output(seq_file_list_1[i]))
    seq_name_2 = ah.single_file_output(seq_file_list_2[0])
    if len(seq_file_list_2) > 1:
        for i in range(len(seq_file_list_2)):
            if i == 0:
                pass
            else:
                seq_name_2 += "_{0}".format(
                    ah.single_file_output(seq_file_list_2[i]))
    return seq_1, seq_2, seq_name_1, seq_name_2
Beispiel #4
0
def _load_ref_shared(ref_file):
    """
    Shared function for loading reference file
    :param ref_file:
    :return:
    """
    ref = RefSeq()
    ref.load_ref_file(ref_file)
    single_ref = ""
    if len(ref) > 1:
        print("\nMultiple reference sequences in file. Exiting.\n")
        sys.exit()
    ref_output = ah.single_file_output(ref_file)
    for header in ref.headers():
        single_ref = ref[header]
    print(colored("------------------ALIGNING READS------------------\n", 'green'))
    return ref_output, single_ref
Beispiel #5
0
def _load_ref_shared(ref_file):
    """
    Shared function for loading reference file
    :param ref_file:
    :return:
    """
    ref = RefSeq()
    ref.load_ref_file(ref_file)
    single_ref = ""
    if len(ref) > 1:
        print("\nMultiple reference sequences in file. Exiting.\n")
        sys.exit()
    ref_output = ah.single_file_output(ref_file)
    for header in ref.headers():
        single_ref = ref[header]
    print(
        colored("------------------ALIGNING READS------------------\n",
                'green'))
    return ref_output, single_ref
Beispiel #6
0
def _cdp_output(counts_by_ref, file_fig, file_name, onscreen, no_csv,
                seq_name_1, seq_name_2, ref_file, nt, pub):
    """
    Organise csv or pdf output for CDP analysis
    """
    ref_name = ah.single_file_output(ref_file)
    if file_fig or onscreen:

        if file_name == "auto":
            file_name = ah.cdp_file_output(seq_name_1, seq_name_2, ref_name,
                                           nt, "pdf")
        pr.cdp_plot(counts_by_ref, seq_name_1, seq_name_2, nt, onscreen,
                    file_fig, file_name, pub)

    if no_csv:
        out_csv_name = ah.cdp_file_output(seq_name_1, seq_name_2, ref_name, nt,
                                          "csv")

        wtf.cdp_output(counts_by_ref, seq_name_1, seq_name_2, out_csv_name)
Beispiel #7
0
def reads_aligned_per_seq(seq_file_list,
                          ref_file,
                          nt,
                          split,
                          min_read_len=18,
                          max_read_len=32,
                          min_read_no=1,
                          processes=4):
    """
    Get RPMR alignments for each sequence file in the list - no plot
    :param seq_file_list: [path/to/seq/, path/to/seq2,...] (list(str))
    :param ref_file: path/to/reference (str):
    :param nt: read length to align (int)
    :param split: spit reads or not (bool)
    :param min_read_size: exclude reads with lengths < min_read_size (int)
    :param max_read_size: exclude reads with lengths > max_read_size (int)
    :param min_read_no: exclude reads with counts below min_read_no (int)
    :param pub: publication plot with no axes, legend (bool)
    :param processes: no of processes to generate at a time i.e. threads (int)
    """
    """
    Calculates normalised reads aligned to multiple reference sequences for each seq file individually

    Outputs a csv only (no scatter plot)
    """
    print(
        colored("-----------------LOADING SEQUENCES----------------", 'green'))
    loaded_seq_list = []  # list of SRNASeq objects
    loaded_seq_name_list = []  # list of seq names in same order
    for seq_file in range(len(seq_file_list)):
        seq = SRNASeq()
        seq.load_seq_file(seq_file_list[seq_file], max_read_len, min_read_no,
                          min_read_len)
        loaded_seq_list.append(seq)
        seq_name = ah.single_file_output(seq_file_list[seq_file])
        loaded_seq_name_list.append(seq_name)

    if split:
        cdp.cdp_no_split_single(loaded_seq_list, loaded_seq_name_list,
                                ref_file, nt, processes)
    else:
        cdp.cdp_split_single(loaded_seq_list, loaded_seq_name_list, ref_file,
                             nt, processes)