Exemple #1
0
def clean_se_run(settings):
	'''
	'''
	# 1. Reads filter illumina kmers
	# 2. Filter data
	# 3. Save
	print "Load library for key=", settings["k"]
	with open(settings["pickle_libraries_file"]) as fh:
		library = cPickle.load(fh)
	library = library[settings["k"]]
	kmers =set(library.keys())
	for kmer in library.keys():
		kmers.add(get_revcomp(kmer))
	with open(settings["dat_libraries_file"], "w") as fh:
		for kmer in kmers:
			fh.write("%s\t-\n" % kmer)

	prefix = settings["prefix"]
	verbose = settings["verbose"]
	adapters_file = settings["dat_libraries_file"]
	fastq1_file = "%s.fastq" % prefix
	fastq1ok_file = "%s.ok.fastq" % prefix
	fastq_bad_file  = "%s.bad.fastq" % prefix
	clean_single_read_data(fastq1_file, fastq1ok_file, fastq_bad_file, verbose=verbose, adapters_file=adapters_file,
			cutoff=settings["cutoff"], polyG_cutoff=settings["polyGcutoff"]
		)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#@created: 10.10.2013
#@author: Aleksey Komissarov
#@contact: [email protected]

import sys
from PyBioSnippets.hiseq.fastq_tools import clean_single_read_data
import argparse

if __name__ == '__main__':

	parser = argparse.ArgumentParser(description='Check presence of adapter kmers.')
	parser.add_argument('-p','--prefix', help='SE prefix', required=True)
	parser.add_argument('-v','--verbose', help='Verbose', required=False, default=False)
	parser.add_argument('-G','--polyG', help='Length of polyG', required=False, default=23)
	parser.add_argument('-c','--cutoff', help='Length cutoff', required=False, default=50)
	parser.add_argument('-a','--adapters', help='File with adapters', required=False, default=None)
	args = vars(parser.parse_args())

	prefix = args["prefix"]
	verbose = args["verbose"]
	cutoff = int(args["cutoff"])
	polyG_cutoff = int(args["polyG"])
	adapters_file = args["adapters"]
	fastq1_file = "%s.fastq" % prefix
	fastq1ok_file = "%s.ok.fastq" % prefix
	fastq_bad_file  = "%s.bad.fastq" % prefix
	clean_single_read_data(fastq1_file, fastq1ok_file, fastq_bad_file, verbose=verbose, adapters_file=adapters_file, cutoff=cutoff, polyG_cutoff=polyG_cutoff)