Exemple #1
0
 def test_run(self):
     """formatLine"""
     # DAD: todo
     #self.assertFalse(True)
     dout = util.DelimitedOutput()
     dout.addHeader("one", "first", 'd')
     dout.addHeader("two", "second", 's')
     entry = {'one': 2, 'two': 'this'}
     line = dout.formatLine(entry)
Exemple #2
0
    # Remove gaps?
    if options.degap:
        for k in query_keys:
            prot_dict[k] = prot_dict[k].replace("-", '')

    if options.debugging:
        query_keys = query_keys[0:min(len(query_keys, 100))]

    # Set up motif to compare
    fac = slide.SequenceCompositionComparatorFactory()
    comparator = fac.make(options.motif, 'sequence')

    # Write output
    n_written = 0
    dout = util.DelimitedOutput()
    dout.addHeader('orf', 'S. cerevisiae systematic name', 's')
    dout.addHeader('n.above', 'Number of windows with score >= threshold', 'd')
    dout.addHeader(
        'max.score',
        'Maximum score (1 - chi-squared histogram distance on normalized aa-composition histograms)',
        'f')
    dout.addHeader(
        'max.position',
        '1-based sequence position of window (start of window) having the maximum score',
        'd')
    dout.describeHeader(data_outs)

    dout.writeHeader(data_outs)
    for orf in query_keys:
        seq = gelscore.Sequence(prot_dict[orf])
Exemple #3
0
def processFile(in_fname, out_fname, options, data_outs, info_outs):
    resfile = pc_res3(in_fname)

    # Parse the file
    resfile.load()
    '''
	# List columns
	print resfile.keys()
	for k in resfile.keys():
		print len(resfile[k]['data'])
	'''

    target_columns = ['UV']  #,'Cond','Pressure','Flow','Temp']
    target_column = 'UV'
    #print resfile[target_column].keys()
    #sys.exit()

    # Write out parameters
    data_outs.write("# Run started {}\n".format(util.timestamp()))
    data_outs.write("# Command: {}\n".format(' '.join(sys.argv)))
    data_outs.write("# Parameters:\n")
    optdict = vars(options)
    for (k, v) in optdict.items():
        data_outs.write("#\t{k}: {v}\n".format(k=k, v=v))
    data_outs.write("# Input filename: {}\n".format(in_fname))
    data_outs.write("# Output filename: {}\n".format(out_fname))

    # Write output
    dout = util.DelimitedOutput()
    dout.addHeader('ml', 'Elution volume (mL)', 'f')
    dout.addHeader(
        target_column,
        '{name} ({unit})'.format(name=resfile[target_column]['data_name'],
                                 unit=resfile[target_column]['unit']), 'f')
    dout.describeHeader(data_outs)

    dout.writeHeader(data_outs)
    format = dout.getFormat(named=True)
    data_length = len(resfile[target_column]['data'])
    n_written = 0
    last_entry_volume = resfile[target_column]['data'][0][0]
    last_entry = False
    cur_entry_volume = None
    # Tuples of (mL,UV)
    elution_window = []
    for i in range(data_length):
        entry = resfile[target_column]['data'][i]
        cur_entry_volume = entry[0]
        cur_entry_value = entry[1]
        #print last_entry_volume, cur_entry_volume
        if cur_entry_volume - last_entry_volume >= options.resolution_ml or i == data_length - 1:
            # Compute averages
            datdict = {
                'ml': stats.mean([x[0] for x in elution_window]),
                target_column: stats.mean([x[1] for x in elution_window])
            }
            # Write out
            line = format.format(**datdict)
            data_outs.write(line)
            n_written += 1
            # Reset
            last_entry_volume = cur_entry_volume
            elution_window = []
        elution_window.append((cur_entry_volume, cur_entry_value))

    # Write out stopping time
    data_outs.write("# Run finished {}\n".format(util.timestamp()))

    # Shut down output
    if not out_fname is None:
        info_outs.write("# Wrote {} lines to {}\n".format(
            n_written, out_fname))