def view(request, response, xfer_msg=None): samplesheet = Samplesheet(request.path_named_values['fcid']) if not samplesheet.exists(): raise HTTP_NOT_FOUND(str(samplesheet)) samplesheet.read() problems = list() header = TR(TH(), TH('FCID'), TH('Lane'), TH('SampleID', BR(), '(as "ID_index-spec")'), TH('SampleRef'), TH('Index', BR(), '(sequence)'), TH('Description'), TH('Control'), TH('Recipe'), TH('Operator')) rows = [] seqindex_lookup = dict() # Key: lane number, value: seq index # Figure out whether that extra A has been appended previously. append_a = None for record in samplesheet.records: if append_a is None or append_a == True: append_a = len(record[4]) > 6 and record[4][-1] == 'A' if append_a is None: append_a = False index_sequence_length = None for pos, record in enumerate(samplesheet.records): # Same length of index sequence required for entire samplesheet! if index_sequence_length is None and record[4]: index_sequence_length = len(record[4]) lanes = [] for i in xrange(1, 9): if i == record[1]: lanes.append(OPTION(str(i), selected=True)) else: lanes.append(OPTION(str(i))) samplerefs = _get_sampleref_options(record[3]) warning = [] if record[4]: # Check index sequence if set(record[4].upper()).difference(set('ATGC')): warning.append('Invalid nucleotide in index sequence!') if index_sequence_length: if index_sequence_length != len(record[4]): warning.append('Unequal length of index sequence!') other_seqindices = seqindex_lookup.get(record[1], set()) if record[4] in other_seqindices: warning.append('Index sequence already used in lane!') else: for other_seqindex in other_seqindices: try: hd = hamming_distance(record[4], other_seqindex) except ValueError: pass else: if hd < MIN_HAMMING_DISTANCE: warning.append('Too small difference between' ' this index sequence and' ' another in lane!') break seqindex_lookup.setdefault(record[1], set()).add(record[4]) if interpret_sampleid_for_index(record[2], append_a) != record[4]: warning.append('SampleID and index sequence inconsistent!') else: warning.append('Missing sequence!') if warning: problems.append(str(pos + 1)) warning = B(' '.join(warning), style='color: red;') # The abominable dot '.' in project identifiers is stored as # double underscore, since CASAVA cannot handle dot. # For display purposes, the dot is shown instead of double underscore. description = record[5].replace('__', '.') rows.append( TR( TD(str(pos + 1)), TD(record[0]), TD(SELECT(name="lane%i" % pos, *lanes)), TD( INPUT(type='text', name="sampleid%i" % pos, value=record[2], size=30)), TD(SELECT(name="sampleref%i" % pos, *samplerefs)), TD( INPUT(type='text', name="index%i" % pos, value=record[4], size=10), warning), TD( INPUT(type='text', name="description%i" % pos, value=description, size=24)), TD( INPUT(type='radio', name="control%i" % pos, value='N', checked=record[6] == 'N'), 'N ', INPUT(type='radio', name="control%i" % pos, value='Y', checked=record[6] == 'Y'), 'Y'), TD( INPUT(type='text', name="recipe%i" % pos, value=record[7], size=4)), TD( INPUT(type='text', name="operator%i" % pos, value=record[8], size=4)))) try: previous_lane = samplesheet.records[-1][1] previous_sampleref = samplesheet.records[-1][3] except IndexError: previous_lane = None previous_sampleref = None lanes = [] for i in xrange(1, 9): if i == previous_lane: lanes.append(OPTION(str(i), selected=True)) else: lanes.append(OPTION(str(i))) samplerefs = _get_sampleref_options(previous_sampleref) rows.append( TR( TD(str(len(samplesheet.records) + 1)), TD(samplesheet.fcid), TD(SELECT(name='lane', multiple=True, *lanes)), TD(INPUT(type='text', name='sampleid', size=30)), TD(SELECT(name='sampleref', *samplerefs)), TD(INPUT(type='text', name='index', size=10)), TD(INPUT(type='text', name='description', size=24)), TD(INPUT(type='radio', checked=True, name='control', value='N'), 'N ', INPUT(type='radio', name='control', value='Y'), 'Y'), TD(INPUT(type='text', name='recipe', size=4)), TD(INPUT(type='text', name='operator', size=4)))) rows.reverse() rows.insert(0, header) table = TABLE(border=1, *rows) instructions = P( UL( LI('To add several records, cut-and-paste' ' from the Google Docs spreadsheet' ' into the text box to the right.'), LI('To add another record,' ' fill in values in the first row.'), LI('To delete a record, set its SampleID' ' to a blank character.'), LI('To modify a record, change the value' ' in the field.'), LI('NOTE: Sample and project identifiers are now' ' strictly controlled: Offensive characters are' ' automatically converted to underscores.'), LI( 'Specify index number for the sample like so:', TABLE(TR(TH('Index type'), TH('Standard name'), TH('Alternate short name')), TR(TD('Ordinary Illumina'), TD('samplename_index3'), TD('samplename_3')), TR(TD('Small RNA'), TD('samplename_rpi6'), TD('samplename_r6')), TR(TD('Agilent'), TD('samplename_agilent14'), TD('samplename_a14')), TR(TD('Mondrian'), TD('samplename_mondrian11'), TD('samplename_m11')), TR(TD('Haloplex'), TD('samplename_halo11'), TD('samplename_h11')), border=1)), LI('Click "Save" to store the samplesheet.' ' Comicbookguy will fetch it automatically' ' within 15 minutes.'))) ops = TABLE( TR( TD( FORM(I('Cut-and-paste 4 columns' ' (Lane, Sample, Project, Ref.genome).'), TEXTAREA(name='cutandpaste', cols=40, rows=4), INPUT(type='submit', value='Add'), method='POST', action=samplesheet.url))), TR( TD( FORM(INPUT(type='submit', value='Sort samplesheet records'), INPUT(type='hidden', name='sort', value='default'), method='POST', action=samplesheet.url))), ## TR(TD(FORM(INPUT(type='submit', ## value='Download CSV file (obsolete)'), ## method='GET', ## action=samplesheet.file_url))), TR( TD( FORM(INPUT(type='submit', value='Delete this samplesheet', onclick="return confirm('Really delete?');"), INPUT(type='hidden', name='http_method', value='DELETE'), method='POST', action=samplesheet.url))), width='100%') warning = [] if xfer_msg: warning.append(P(xfer_msg)) if problems: warning.append( P("There are problems regarding records %s!" % ', '.join(problems))) warning = DIV(style='color: red;', *warning) form = FORM( P(INPUT(type='submit', value='Save')), ## INPUT(type='checkbox', name='append_a', ## value='y', checked=append_a), ## " Append an 'A' to a newly defined index sequence."), P(table), method='POST', action=samplesheet.url) response['Content-Type'] = 'text/html' response.append( str( HTML( HEAD(TITLE(str(samplesheet))), BODY(A('Home', href=get_url()), H1(str(samplesheet)), TABLE(TR(TD(instructions), TD(ops))), warning, form))))
def view(request, response, xfer_msg=None): if invalid_data_dir(request, response): return samplesheet = Samplesheet(request.path_named_values['fcid']) if not samplesheet.exists: raise HTTP_NOT_FOUND(str(samplesheet)) samplesheet.read() problems = set() header = TR(TH(), TH('FCID'), TH('Lane'), TH('SampleID + index-spec', BR(), '(format: see above)', width='20%'), TH('SampleRef'), TH('Index', BR(), '(sequence)'), TH('ProjectID'), TH('Control'), TH('Recipe'), TH('Operator')) rows = [] # Key: lane number, value: tuple (seq index, SampleID) seqindex_lookup = dict() # Figure out whether that extra A has been appended previously. append_a = None for record in samplesheet.records: if append_a is None or append_a == True: append_a = len(record[4]) > 6 and record[4][-1] == 'A' if append_a is None: append_a = False # Require same index sequence length within each lane. index_sequence_lengths = [None] * 9 # 1-based index for max 8 lanes. for pos, record in enumerate(samplesheet.records): lane = record[1] lanes = [] for i in xrange(1, 9): if i == lane: lanes.append(OPTION(str(i), selected=True)) else: lanes.append(OPTION(str(i))) samplerefs = _get_sampleref_options(record[3]) sample_warning = [] project_warning = [] # Check valid sampleid sampleid = record[2] if not SAMPLEID_RX.match(sampleid): sampleid = '_'.join(sampleid.split('_')[:-1]) if not SAMPLEID_RX.match(sampleid): sample_warning.append('Invalid SampleID.') if record[3] == 'unknown': sample_warning.append('Unknown SampleRef.') if record[4]: # Check index sequence; '-' for dual if set(record[4].upper()).difference(set('ATGC-')): sample_warning.append('Invalid nucleotide in index sequence.') if index_sequence_lengths[lane] is None: index_sequence_lengths[lane] = len(record[4]) else: if index_sequence_lengths[lane] != len(record[4]): sample_warning.append('Unequal length of index sequence in lane.') other_seqindices = seqindex_lookup.get(lane, set()) if record[4] in other_seqindices: sample_warning.append('Index sequence already used in lane.') else: for other_seqindex, other_sampleid in other_seqindices: ld = levenshtein_distance(record[4], other_seqindex, shortest=True) if ld < MIN_LEVENSHTEIN_DISTANCE: sample_warning.append('Too small Levenshtein distance' ' between this index sequence' ' and sample %s in lane.' % other_sampleid) break hd = hamming_distance(record[4], other_seqindex, shortest=True) if hd < MIN_HAMMING_DISTANCE: sample_warning.append('Too small Hamming distance' ' between this index sequence' ' and sample %s in lane.' % other_sampleid) break seqindex_lookup.setdefault(lane, set()).add((record[4], record[2])) indexseq = interpret_sampleid_for_index(record[2], append_a) if indexseq and indexseq != record[4]: sample_warning.append('SampleID and index sequence inconsistent.') if not record[4]: sample_warning.append('Index sequence missing.') else: sample_warning.append('Missing sequence.') if sample_warning: problems.add(pos+1) sample_warning = B('<br>'.join(sample_warning), style='color: red;') # The abominable dot '.' in project identifiers is stored as # double underscore, since CASAVA cannot handle dot. # For display purposes, the dot is shown instead of double underscore. description = record[5].replace('__', '.') if not PROJECTID_RX.match(record[5]): project_warning.append('Project ID is malformed') if project_warning: problems.add(pos+1) project_warning = B('<br>'.join(project_warning), style='color: red;') rows.append(TR(TD(str(pos+1)), TD(record[0]), TD(SELECT(name="lane%i" % pos, *lanes)), TD(INPUT(type='text', name="sampleid%i" % pos, value=record[2], size=24), sample_warning), TD(SELECT(name="sampleref%i" % pos, *samplerefs)), TD(INPUT(type='text', name="index%i" % pos, value=record[4], size=16)), TD(INPUT(type='text', name="description%i" % pos, value=description, size=24), project_warning), TD(INPUT(type='radio', name="control%i" % pos, value='N', checked=record[6]=='N'), 'N ', BR(), INPUT(type='radio', name="control%i" % pos, value='Y', checked=record[6]=='Y'), 'Y'), TD(INPUT(type='text', name="recipe%i" % pos, value=record[7], size=4)), TD(INPUT(type='text', name="operator%i" % pos, value=record[8], size=4)))) try: previous_lane = samplesheet.records[-1][1] previous_sampleref = samplesheet.records[-1][3] except IndexError: previous_lane = None previous_sampleref = None lanes = [] for i in xrange(1, 9): if i == previous_lane: lanes.append(OPTION(str(i), selected=True)) else: lanes.append(OPTION(str(i))) samplerefs = _get_sampleref_options(previous_sampleref) rows.append(TR(TD(str(len(samplesheet.records)+1)), TD(samplesheet.fcid), TD(SELECT(name='lane', multiple=True, *lanes)), TD(INPUT(type='text', name='sampleid', size=24)), TD(SELECT(name='sampleref', *samplerefs)), TD(INPUT(type='text', name='index', size=16)), TD(INPUT(type='text', name='description', size=24)), TD(INPUT(type='radio', checked=True, name='control', value='N'), 'N ', BR(), INPUT(type='radio', name='control', value='Y'), 'Y'), TD(INPUT(type='text', name='recipe', size=4)), TD(INPUT(type='text', name='operator', size=4)))) rows.reverse() rows.insert(0, header) table = TABLE(border=1, cellpadding=2, *rows) instructions = P(UL(LI('To add several records, cut-and-paste' ' from the Google Docs spreadsheet' ' into the text box to the right, then save.'), LI('To add another record, fill in values' ' in the first row, then save.'), LI('To delete a record, set its SampleID' ' to a blank character, then save.'), LI('To modify a record, change the value' ' in the field, then save.'), LI('Offensive characters in Project Identifiers' ' will be automatically converted' ' to underscores.'), LI('SampleID must look like ', B('P123_456'), ', possibly with any of the' ' characters B, C, D or F attached.'), LI('If the index suffix looks like a nucleotide' ' sequence with at least 6 bases, it will be used.'), LI('Specify index number for the sample by adding the' ' appropriate suffix using underscore, like so:', TABLE(TR(TH('Index type'), TH('Standard index spec'), TH('Alternate short index spec')), TR(TD('Illumina'), TD('sampleid_index3'), TD('sampleid_i3')), TR(TD('Small RNA'), TD('sampleid_rpi6'), TD('sampleid_r6')), TR(TD('Agilent'), TD('sampleid_agilent14'), TD('sampleid_a14')), TR(TD('Mondrian'), TD('sampleid_mondrian11'), TD('sampleid_m11')), TR(TD('Haloplex'), TD('sampleid_halo11'), TD('sampleid_h11')), TR(TD('Haloplex HT 8-bp'), TD('sampleid_haloht31'), TD('sampleid_hht31')), TR(TD('SureSelect'), TD('sampleid_sureselect9'), TD('sampleid_ss9')), TR(TD('TruSeq DNA Dual HT'), TD('sampleid_dual13')), TR(TD('Nextera Dual HT'), TD('sampleid_nxdual15')), TR(TD('Halo HT Dual'), TD('sampleid_haloht15dual')), TR(TD('Illumina Dual'), TD('sampleid_index15dual')), TR(TD('Agilent SureSelect XT'), TD('sampleid_xtd04')), border=1, cellpadding=2)))) ops = TABLE(TR(TD(FORM(I('Cut-and-paste 4 columns' ' (Lane, Sample, Project, Ref.genome).'), TEXTAREA(name='cutandpaste', cols=40, rows=4), INPUT(type='submit', value='Add'), method='POST', action=samplesheet.url))), TR(TD(FORM(INPUT(type='submit', value='Sort samplesheet records'), INPUT(type='hidden', name='sort', value='default'), method='POST', action=samplesheet.url))), TR(TD(FORM(INPUT(type='submit', value='Delete this samplesheet', onclick="return confirm('Really delete?');"), INPUT(type='hidden', name='http_method', value='DELETE'), method='POST', action=samplesheet.url))), width='100%') title = "%s (%s)" % (samplesheet, A("CSV file", href=samplesheet.file_url)) warning = [] if xfer_msg: warning.append(P(xfer_msg)) if problems: problems = sorted(problems) problems = ', '.join(map(str, problems)) warning.append(P("There are problems regarding records %s!" % problems)) warning = DIV(style='color: red;', *warning) form = FORM(P(INPUT(type='submit', value='Save'), ' Store the samplesheet. The pipeline computer (comicbookguy)' ' will fetch it automatically within 15 minutes.'), P(table), method='POST', action=samplesheet.url) response['Content-Type'] = 'text/html' response.append(str(HTML(HEAD(TITLE(str(samplesheet))), BODY(A('Home', href=get_url()), H1(title), TABLE(TR(TD(instructions), TD(ops))), warning, form))))