def test_run(self): """formatLine""" # DAD: todo #self.assertFalse(True) dout = util.DelimitedOutput() dout.addHeader("one", "first", 'd') dout.addHeader("two", "second", 's') entry = {'one': 2, 'two': 'this'} line = dout.formatLine(entry)
# Remove gaps? if options.degap: for k in query_keys: prot_dict[k] = prot_dict[k].replace("-", '') if options.debugging: query_keys = query_keys[0:min(len(query_keys, 100))] # Set up motif to compare fac = slide.SequenceCompositionComparatorFactory() comparator = fac.make(options.motif, 'sequence') # Write output n_written = 0 dout = util.DelimitedOutput() dout.addHeader('orf', 'S. cerevisiae systematic name', 's') dout.addHeader('n.above', 'Number of windows with score >= threshold', 'd') dout.addHeader( 'max.score', 'Maximum score (1 - chi-squared histogram distance on normalized aa-composition histograms)', 'f') dout.addHeader( 'max.position', '1-based sequence position of window (start of window) having the maximum score', 'd') dout.describeHeader(data_outs) dout.writeHeader(data_outs) for orf in query_keys: seq = gelscore.Sequence(prot_dict[orf])
def processFile(in_fname, out_fname, options, data_outs, info_outs): resfile = pc_res3(in_fname) # Parse the file resfile.load() ''' # List columns print resfile.keys() for k in resfile.keys(): print len(resfile[k]['data']) ''' target_columns = ['UV'] #,'Cond','Pressure','Flow','Temp'] target_column = 'UV' #print resfile[target_column].keys() #sys.exit() # Write out parameters data_outs.write("# Run started {}\n".format(util.timestamp())) data_outs.write("# Command: {}\n".format(' '.join(sys.argv))) data_outs.write("# Parameters:\n") optdict = vars(options) for (k, v) in optdict.items(): data_outs.write("#\t{k}: {v}\n".format(k=k, v=v)) data_outs.write("# Input filename: {}\n".format(in_fname)) data_outs.write("# Output filename: {}\n".format(out_fname)) # Write output dout = util.DelimitedOutput() dout.addHeader('ml', 'Elution volume (mL)', 'f') dout.addHeader( target_column, '{name} ({unit})'.format(name=resfile[target_column]['data_name'], unit=resfile[target_column]['unit']), 'f') dout.describeHeader(data_outs) dout.writeHeader(data_outs) format = dout.getFormat(named=True) data_length = len(resfile[target_column]['data']) n_written = 0 last_entry_volume = resfile[target_column]['data'][0][0] last_entry = False cur_entry_volume = None # Tuples of (mL,UV) elution_window = [] for i in range(data_length): entry = resfile[target_column]['data'][i] cur_entry_volume = entry[0] cur_entry_value = entry[1] #print last_entry_volume, cur_entry_volume if cur_entry_volume - last_entry_volume >= options.resolution_ml or i == data_length - 1: # Compute averages datdict = { 'ml': stats.mean([x[0] for x in elution_window]), target_column: stats.mean([x[1] for x in elution_window]) } # Write out line = format.format(**datdict) data_outs.write(line) n_written += 1 # Reset last_entry_volume = cur_entry_volume elution_window = [] elution_window.append((cur_entry_volume, cur_entry_value)) # Write out stopping time data_outs.write("# Run finished {}\n".format(util.timestamp())) # Shut down output if not out_fname is None: info_outs.write("# Wrote {} lines to {}\n".format( n_written, out_fname))