def getFromArrayByKey(cf): """Wrapper for the findfile_impl function.""" commentchar = cf.get_parameter('commentchar', 'string') delimiter = cf.get_parameter('delimiter', 'string') if delimiter == 'tab': delimiter = '\t' keycol = cf.get_parameter('keycol', 'int') keysfile = cf.get_input('keys') reader = csv.reader(open(keysfile, 'U'), delimiter=delimiter, quoting=csv.QUOTE_NONE) #read the header reader.next() keys = [] for row in reader: if not len(row) > 0 or \ (not commentchar == "" and \ row[0].startswith(commentchar)): continue elif keycol > len(row): cf.write_error("Index %s out of bounds in row %s" % (keycol, row)) return constants.GENERIC_ERROR else: keys.append(row[keycol]) cf.write_log(str(keys)) inarray = get_array(cf, 'in_array') outarray = AndurilOutputArray(cf, 'out_array') for key, value in inarray: cf.write_log("Key: %s, Value: %s" % (key, value)) if key in keys: outarray.write(key, value) return constants.OK
def separatePairsAndSingles(cf): """In an array of runs, separate the paired end reads into two separate files, and the singles into a third.""" fastqfiles = get_array(cf, 'fastqfiles') srafetchxml = cf.get_input('srafetchxml') leftfh = open(cf.get_output('left'), 'w') rightfh = open(cf.get_output('right'), 'w') singlefh = open(cf.get_output('single'), 'w') for accession, fastqfile in fastqfiles: fqp = FastqParser() fastqfh = open(fastqfile, 'U') if isPaired(srafetchxml, accession): iter = fqp.parse(fastqfh) #paired end run while True: try: pe1 = iter.next() pe2 = iter.next() leftfh.write(str(pe1) + '\n') rightfh.write(str(pe2) + '\n') except StopIteration: break else: #single end run for rec in fqp.parse(fastqfh): singlefh.write(str(rec) + '\n') leftfh.close() rightfh.close() singlefh.close() return constants.OK
def fasta_merge(cf): """Merge an array of fastafiles.""" outfh = open(cf.get_output("output"), "w") fastafiles = get_array(cf, "fastafiles") cf.write_log(str(fastafiles)) for key, fastafile in fastafiles: for rec in fasta_itr(fastafile): outfh.write(str(rec) + "\n") outfh.close() return constants.OK
def fastq_merge(cf): """Merge an array of fastqfiles.""" outfh = open(cf.get_output('output'), 'w') fastqfiles = get_array(cf, 'in_array') cf.write_log(str(fastqfiles)) fqp = FastqParser() for key, fastqfile in fastqfiles: for rec in fqp.parse(open(fastqfile, 'U')): outfh.write(str(rec) + '\n') outfh.close() return constants.OK
def sffmerge(cf): inarray = get_array(cf, 'in_array') outputfile = cf.get_output('output') params = ['-o', outputfile] for key, inputfile in inarray: params.append(inputfile) try: subprocess.check_call(['sfffile'] + params) except subprocess.CalledProcessError, e: cf.write_log("Error running sfffile.") cf.write_log("Error: %s" % str(e)) return constants.GENERIC_ERROR
def newbler(cf): """Execute a Newbler assembly.""" params = ['-o', cf.get_output('assemblydir')] vectortrimming = cf.get_input('vectortrimming') if vectortrimming \ and os.path.isfile(vectortrimming) \ and os.path.getsize(vectortrimming) > 0: params += ['-vt', vectortrimming] if cf.get_parameter('large', 'boolean'): params.append('-large') params += ['-cpu', str(cf.get_parameter('threads')), '-cdna', '-m', '-urt', '-force'] inputfiles = get_array(cf, 'inputfiles') for key, inputfile in inputfiles: params.append(inputfile) cf.write_log("Params: %s" % str(params)) tries = cf.get_parameter('retries', 'int') + 1 return runNewblerWithRetry(cf, params, tries)