def check_already_decontaminated(sample, fragment, PCR):
    """Check from the summary and output file whether decontamination has been done"""
    import os
    from hivwholeseq.utils.mapping import get_number_reads

    if (fragment == "F4") and (sample.name in maj_contnames):
        return True

    fn_sum = get_decontaminate_summary_filename(sample.patient, sample.name, fragment, PCR=PCR)
    fn_in = sample.get_mapped_filtered_filename(fragment, PCR=PCR, decontaminated=False)
    fn_out = sample.get_mapped_filtered_filename(fragment, PCR=PCR, decontaminated=True)

    if not all(map(os.path.isfile, [fn_sum, fn_in, fn_out])):
        print sample.patient, sample.name, fragment, PCR
        return False

    n_reads_in = get_number_reads(fn_in)
    n_reads_out = get_number_reads(fn_out)
    (n_reads_good, n_reads_cont) = get_number_reads_summary(fn_sum)

    if n_reads_out != n_reads_good:
        print sample.patient, sample.name, fragment, PCR, n_reads_in, n_reads_out, n_reads_good, n_reads_cont
        return False

    if n_reads_good < 0.5 * n_reads_in:
        print sample.patient, sample.name, fragment, PCR, n_reads_in, n_reads_out, n_reads_good, n_reads_cont
        return False

    return True
Esempio n. 2
0
def check_already_decontaminated(sample, fragment, PCR):
    '''Check from the summary and output file whether decontamination has been done'''
    import os
    from hivwholeseq.utils.mapping import get_number_reads

    if (fragment == 'F4') and (sample.name in maj_contnames):
        return True

    fn_sum = get_decontaminate_summary_filename(sample.patient,
                                                sample.name,
                                                fragment,
                                                PCR=PCR)
    fn_in = sample.get_mapped_filtered_filename(fragment,
                                                PCR=PCR,
                                                decontaminated=False)
    fn_out = sample.get_mapped_filtered_filename(fragment,
                                                 PCR=PCR,
                                                 decontaminated=True)

    if not all(map(os.path.isfile, [fn_sum, fn_in, fn_out])):
        print sample.patient, sample.name, fragment, PCR
        return False

    n_reads_in = get_number_reads(fn_in)
    n_reads_out = get_number_reads(fn_out)
    (n_reads_good, n_reads_cont) = get_number_reads_summary(fn_sum)

    if n_reads_out != n_reads_good:
        print sample.patient, sample.name, fragment, PCR, n_reads_in, n_reads_out, n_reads_good, n_reads_cont
        return False

    if n_reads_good < 0.5 * n_reads_in:
        print sample.patient, sample.name, fragment, PCR, n_reads_in, n_reads_out, n_reads_good, n_reads_cont
        return False

    return True
                    if samples.loc[contname].patient == pname:
                        del consensi_sample[contname]

                print samplename,
                if VERBOSE >= 2:
                    print ""
                (n_good, n_cont) = filter_contamination(
                    bamfilename, bamfilename_out, consensi_sample, samplename, VERBOSE=VERBOSE, maxreads=maxreads
                )

                if VERBOSE:
                    print "good:", n_good, "contaminated:", n_cont

                if summary:
                    sfn = get_decontaminate_summary_filename(pname, samplename, fragment, PCR=PCR_sample)
                    with open(sfn, "w") as f:
                        f.write(
                            "Call: python decontaminate_reads.py"
                            + " --samples "
                            + samplename
                            + " --fragments "
                            + fragment
                            + " --verbose "
                            + str(VERBOSE)
                        )
                        if maxreads != -1:
                            f.write(" --maxreads " + str(maxreads))
                        f.write("\n")
                        f.write("Good: " + str(n_good) + "\n")
                        f.write("Contaminated: " + str(sum(n_cont.itervalues())) + "\n")
Esempio n. 4
0
                print samplename,
                if VERBOSE >= 2:
                    print ''
                (n_good, n_cont) = filter_contamination(bamfilename,
                                                        bamfilename_out,
                                                        consensi_sample,
                                                        samplename,
                                                        VERBOSE=VERBOSE,
                                                        maxreads=maxreads)

                if VERBOSE:
                    print 'good:', n_good, 'contaminated:', n_cont

                if summary:
                    sfn = get_decontaminate_summary_filename(pname,
                                                             samplename,
                                                             fragment,
                                                             PCR=PCR_sample)
                    with open(sfn, 'w') as f:
                        f.write('Call: python decontaminate_reads.py'+\
                                ' --samples '+samplename+\
                                ' --fragments '+fragment+\
                                ' --verbose '+str(VERBOSE))
                        if maxreads != -1:
                            f.write(' --maxreads ' + str(maxreads))
                        f.write('\n')
                        f.write('Good: ' + str(n_good) + '\n')
                        f.write('Contaminated: ' +
                                str(sum(n_cont.itervalues())) + '\n')
                        f.write('Contamination sources:\n')
                        for contname, n_conti in n_cont.iteritems():
                            f.write('{:<20s}'.format(contname) + ' ' +
    samples = load_samples_sequenced()
    samplenames = refnames + samples.index.tolist()

    for fragment in fragments:
        if VERBOSE >= 1:
            print fragment

        mat = np.ma.masked_all((len(samplenames), len(samplenames)))

        for (samplename, sample) in samples.iterrows():
            if VERBOSE >= 2:
                print samplename,

            isp = samplenames.index(samplename)
            sample = SamplePat(sample)
            fn = get_decontaminate_summary_filename(sample.patient, samplename, fragment,
                                                    PCR=1)
            
            #FIXME: still finishing this one
            if samplename == '29184':
                continue

            if not os.path.isfile(fn):
                if VERBOSE >= 2:
                    print 'summary file not found'
                continue

            n_good, n_cont, n_cont_dict = get_number_reads_summary(fn, details=True)

            if VERBOSE >= 2:
                print n_good, n_cont