def getFromArrayByKey(cf):
	"""Wrapper for the findfile_impl function."""
	commentchar = cf.get_parameter('commentchar', 'string')
	delimiter = cf.get_parameter('delimiter', 'string')
	if delimiter == 'tab':
		delimiter = '\t'
	keycol = cf.get_parameter('keycol', 'int')
	keysfile = cf.get_input('keys')
	reader = csv.reader(open(keysfile, 'U'), delimiter=delimiter, quoting=csv.QUOTE_NONE)
	#read the header
	reader.next()
	keys = []
	for row in reader:
		if not len(row) > 0 or \
			(not commentchar == "" and \
			row[0].startswith(commentchar)):
			continue
		elif keycol > len(row):
			cf.write_error("Index %s out of bounds in row %s" % (keycol, row))
			return constants.GENERIC_ERROR
		else:
			keys.append(row[keycol])
	cf.write_log(str(keys))
	inarray = get_array(cf, 'in_array')
	outarray = AndurilOutputArray(cf, 'out_array')
	for key, value in inarray:
		cf.write_log("Key: %s, Value: %s" % (key, value))
		if key in keys:
			outarray.write(key, value)
	return constants.OK
def separatePairsAndSingles(cf):
	"""In an array of runs, separate the paired end reads into two separate files, and 
		the singles into a third."""
	fastqfiles = get_array(cf, 'fastqfiles')
	srafetchxml = cf.get_input('srafetchxml')
	leftfh = open(cf.get_output('left'), 'w')
	rightfh = open(cf.get_output('right'), 'w')
	singlefh = open(cf.get_output('single'), 'w')
	for accession, fastqfile in fastqfiles:
		fqp = FastqParser()
		fastqfh = open(fastqfile, 'U')
		if isPaired(srafetchxml, accession):
			iter = fqp.parse(fastqfh)
			#paired end run
			while True:
				try:
					pe1 = iter.next()
					pe2 = iter.next()
					leftfh.write(str(pe1) + '\n')
					rightfh.write(str(pe2) + '\n')
				except StopIteration:
					break
		else:
			#single end run
			for rec in fqp.parse(fastqfh):
				singlefh.write(str(rec) + '\n')
	leftfh.close()
	rightfh.close()
	singlefh.close()
	return constants.OK
Exemple #3
0
def fasta_merge(cf):
    """Merge an array of fastafiles."""
    outfh = open(cf.get_output("output"), "w")
    fastafiles = get_array(cf, "fastafiles")
    cf.write_log(str(fastafiles))
    for key, fastafile in fastafiles:
        for rec in fasta_itr(fastafile):
            outfh.write(str(rec) + "\n")
    outfh.close()
    return constants.OK
Exemple #4
0
def fastq_merge(cf):
	"""Merge an array of fastqfiles."""
	outfh = open(cf.get_output('output'), 'w')
	fastqfiles = get_array(cf, 'in_array')
	cf.write_log(str(fastqfiles))
	fqp = FastqParser()
	for key, fastqfile in fastqfiles:
		for rec in fqp.parse(open(fastqfile, 'U')):
			outfh.write(str(rec) + '\n')
	outfh.close()
	return constants.OK
Exemple #5
0
def sffmerge(cf):
	inarray = get_array(cf, 'in_array')
	outputfile = cf.get_output('output')
	params = ['-o', outputfile]
	for key, inputfile in inarray:
		params.append(inputfile)
	try:
		subprocess.check_call(['sfffile'] + params)
	except subprocess.CalledProcessError, e:
		cf.write_log("Error running sfffile.")
		cf.write_log("Error: %s" % str(e))
		return constants.GENERIC_ERROR
def newbler(cf):
	"""Execute a Newbler assembly."""
	params = ['-o', cf.get_output('assemblydir')]
	vectortrimming = cf.get_input('vectortrimming')
	if vectortrimming \
		and os.path.isfile(vectortrimming) \
		and os.path.getsize(vectortrimming) > 0:
		params += ['-vt', vectortrimming]
	if cf.get_parameter('large', 'boolean'):
		params.append('-large')
	params += ['-cpu', str(cf.get_parameter('threads')), '-cdna', '-m', '-urt', '-force']
	inputfiles = get_array(cf, 'inputfiles')
	for key, inputfile in inputfiles:
		params.append(inputfile)
	cf.write_log("Params: %s" % str(params))
	tries = cf.get_parameter('retries', 'int') + 1
	return runNewblerWithRetry(cf, params, tries)