Example #1
0
def main():
    logger = init_logging_console_only(logging.INFO)
    try:
        logger.info('Starting.')
        fname = 'censored1.fastq'
        bad_cycles_filename = 'bad_cycles.csv'
        dd = MicallDD(fname, bad_cycles_filename)
        read_indexes = range(len(dd.reads))
        run_test = True
        if run_test:
            min_indexes = dd.ddmin(read_indexes)
        else:
            min_indexes = read_indexes
        dd._test(min_indexes, debug_file_prefix='micall_debug')
        # dd.write_simple_fastq(fname + '_min.fastq', min_indexes)
        logger.info('Done.')
    except Exception as ex:
        logger.error('Failed.', exc_info=ex)
Example #2
0
from micall.utils.externals import Bowtie2, Bowtie2Build, LineCounter
from micall.utils.translation import reverse_and_complement

CONSENSUS_Q_CUTOFF = 20  # Min Q for base to contribute to conseq (pileup2conseq)
MIN_MAPPING_EFFICIENCY = 0.95  # Fraction of fastq reads mapped needed
MAX_REMAPS = 3  # Number of remapping attempts if mapping efficiency unsatisfied

# SAM file format
fieldnames = [
    'qname', 'flag', 'rname', 'pos', 'mapq', 'cigar', 'rnext', 'pnext', 'tlen',
    'seq', 'qual'
]

cigar_re = re.compile('[0-9]+[MIDNSHPX=]')  # CIGAR token

logger = miseq_logging.init_logging_console_only(logging.DEBUG)
indel_re = re.compile('[+-][0-9]+')
line_counter = LineCounter()


def is_first_read(flag):
    """
    Interpret bitwise flag from SAM field.
    Returns True or False indicating whether the read is the first read in a pair.
    """
    IS_FIRST_SEGMENT = 0x40
    return (int(flag) & IS_FIRST_SEGMENT) != 0


def is_unmapped_read(flag):
    """
Example #3
0
             simple_prefix,
             pssm,
             ruby_script,
             delete_results=False)
        if not txtfilename.endswith('.txt'):
            with open(simple_prefix + '.txt', 'w') as simplefile:
                for line in simple_remap_lines:
                    simplefile.write(line)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Find the simplest test failure by trimming SAM files.')

    parser.add_argument('workdir', help='path to folder holding SAM files')
    parser.add_argument('ruby_script', help='path to Ruby version of G2P')
    parser.add_argument('--pattern',
                        default='*.remap.csv',
                        help='File name pattern to match SAM files')

    args = parser.parse_args()

    logger = init_logging_console_only(logging.INFO)
    pssm = Pssm(path_to_lookup='../g2p/g2p_fpr.txt',
                path_to_matrix='../g2p/g2p.matrix')
    for txtfilename in sorted(
            glob.glob(os.path.join(args.workdir, args.pattern))):
        logger.info(os.path.basename(txtfilename))
        compare_conseqs(txtfilename, args.ruby_script, pssm)
    logger.info('Done.')
Example #4
0
    parser.add_argument('coord_ins_csv',
                        type=argparse.FileType('w'),
                        help='CSV containing insertions relative to coordinate reference')
    parser.add_argument('conseq_csv',
                        type=argparse.FileType('w'),
                        help='CSV containing consensus sequences')
    parser.add_argument('failed_align_csv',
                        type=argparse.FileType('w'),
                        help='CSV containing any consensus that failed to align')
    parser.add_argument('nuc_variants_csv',
                        type=argparse.FileType('w'),
                        help='CSV containing top nucleotide variants')

    return parser.parse_args()

logger = miseq_logging.init_logging_console_only(logging.DEBUG)

MAX_CUTOFF = 'MAX'


class SequenceReport(object):
    """ Hold the data for several reports related to a sample's genetic sequence.

    To use a report object, read a group of aligned reads that mapped to a
    single region, and then write out all the reports for that region.
    """
    def __init__(self,
                 insert_writer,
                 projects,
                 conseq_mixture_cutoffs):
        """ Create an object instance.
Example #5
0
        print 'Simplifying sample {}'.format(txtfilename)
        reads = defaultdict(list)
        read_fastq(txtfilename, reads)
        read_count = len(reads)
        read_fastq(get_reverse_filename(txtfilename), reads)
        added_count = len(reads) - read_count
        if added_count > 0:
            raise RuntimeError('Found {} new reads.'.format(added_count))
        reads = reads.values()
        simple_filename = txtfilename.replace('censored1.fastq',
                                              'simple_censored1.fastq')
        simple_fastq_lines = ddmin(reads, simple_filename)
        write_simple_fastq(simple_filename, simple_fastq_lines)

if __name__ == '__main__':
    logger = init_logging_console_only(logging.INFO)
    test_file('/home/don/git/MiCall/micall/tests/working/61515A-HCV_S1_uncensored1.fastq')
    exit()

    parser = argparse.ArgumentParser(
        description='Find the simplest test failure by trimming FASTQ files.')

    parser.add_argument('workdir', help='path to folder holding FASTQ files')
    parser.add_argument('--pattern',
                        default='*censored1.fastq',
                        help='File name pattern to match FASTQ files')

    args = parser.parse_args()

    filenames = glob.glob(os.path.join(args.workdir, args.pattern))
    filenames.sort()