Python hamming_distanceの例

プログラミング言語: Python

名前空間/パッケージ名: samplesheet.annotate_index

メソッド/関数: hamming_distance

hotexamples.comのコード掲載数: 2

Python hamming_distance - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsamplesheet.annotate_index.hamming_distanceの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: wsgi_application.py プロジェクト: b97pla/samplesheet

def view(request, response, xfer_msg=None):
    samplesheet = Samplesheet(request.path_named_values['fcid'])
    if not samplesheet.exists():
        raise HTTP_NOT_FOUND(str(samplesheet))
    samplesheet.read()
    problems = list()
    header = TR(TH(), TH('FCID'), TH('Lane'),
                TH('SampleID', BR(), '(as "ID_index-spec")'), TH('SampleRef'),
                TH('Index', BR(), '(sequence)'), TH('Description'),
                TH('Control'), TH('Recipe'), TH('Operator'))
    rows = []
    seqindex_lookup = dict()  # Key: lane number, value: seq index
    # Figure out whether that extra A has been appended previously.
    append_a = None
    for record in samplesheet.records:
        if append_a is None or append_a == True:
            append_a = len(record[4]) > 6 and record[4][-1] == 'A'
    if append_a is None:
        append_a = False
    index_sequence_length = None
    for pos, record in enumerate(samplesheet.records):
        # Same length of index sequence required for entire samplesheet!
        if index_sequence_length is None and record[4]:
            index_sequence_length = len(record[4])
        lanes = []
        for i in xrange(1, 9):
            if i == record[1]:
                lanes.append(OPTION(str(i), selected=True))
            else:
                lanes.append(OPTION(str(i)))
        samplerefs = _get_sampleref_options(record[3])
        warning = []
        if record[4]:  # Check index sequence
            if set(record[4].upper()).difference(set('ATGC')):
                warning.append('Invalid nucleotide in index sequence!')
            if index_sequence_length:
                if index_sequence_length != len(record[4]):
                    warning.append('Unequal length of index sequence!')
            other_seqindices = seqindex_lookup.get(record[1], set())
            if record[4] in other_seqindices:
                warning.append('Index sequence already used in lane!')
            else:
                for other_seqindex in other_seqindices:
                    try:
                        hd = hamming_distance(record[4], other_seqindex)
                    except ValueError:
                        pass
                    else:
                        if hd < MIN_HAMMING_DISTANCE:
                            warning.append('Too small difference between'
                                           ' this index sequence and'
                                           ' another in lane!')
                            break
                seqindex_lookup.setdefault(record[1], set()).add(record[4])
            if interpret_sampleid_for_index(record[2], append_a) != record[4]:
                warning.append('SampleID and index sequence inconsistent!')
        else:
            warning.append('Missing sequence!')
        if warning:
            problems.append(str(pos + 1))
        warning = B(' '.join(warning), style='color: red;')
        # The abominable dot '.' in project identifiers is stored as
        # double underscore, since CASAVA cannot handle dot.
        # For display purposes, the dot is shown instead of double underscore.
        description = record[5].replace('__', '.')
        rows.append(
            TR(
                TD(str(pos + 1)), TD(record[0]),
                TD(SELECT(name="lane%i" % pos, *lanes)),
                TD(
                    INPUT(type='text',
                          name="sampleid%i" % pos,
                          value=record[2],
                          size=30)),
                TD(SELECT(name="sampleref%i" % pos, *samplerefs)),
                TD(
                    INPUT(type='text',
                          name="index%i" % pos,
                          value=record[4],
                          size=10), warning),
                TD(
                    INPUT(type='text',
                          name="description%i" % pos,
                          value=description,
                          size=24)),
                TD(
                    INPUT(type='radio',
                          name="control%i" % pos,
                          value='N',
                          checked=record[6] == 'N'), 'N ',
                    INPUT(type='radio',
                          name="control%i" % pos,
                          value='Y',
                          checked=record[6] == 'Y'), 'Y'),
                TD(
                    INPUT(type='text',
                          name="recipe%i" % pos,
                          value=record[7],
                          size=4)),
                TD(
                    INPUT(type='text',
                          name="operator%i" % pos,
                          value=record[8],
                          size=4))))
    try:
        previous_lane = samplesheet.records[-1][1]
        previous_sampleref = samplesheet.records[-1][3]
    except IndexError:
        previous_lane = None
        previous_sampleref = None
    lanes = []
    for i in xrange(1, 9):
        if i == previous_lane:
            lanes.append(OPTION(str(i), selected=True))
        else:
            lanes.append(OPTION(str(i)))
        samplerefs = _get_sampleref_options(previous_sampleref)
    rows.append(
        TR(
            TD(str(len(samplesheet.records) + 1)), TD(samplesheet.fcid),
            TD(SELECT(name='lane', multiple=True, *lanes)),
            TD(INPUT(type='text', name='sampleid', size=30)),
            TD(SELECT(name='sampleref', *samplerefs)),
            TD(INPUT(type='text', name='index', size=10)),
            TD(INPUT(type='text', name='description', size=24)),
            TD(INPUT(type='radio', checked=True, name='control', value='N'),
               'N ', INPUT(type='radio', name='control', value='Y'), 'Y'),
            TD(INPUT(type='text', name='recipe', size=4)),
            TD(INPUT(type='text', name='operator', size=4))))
    rows.reverse()
    rows.insert(0, header)
    table = TABLE(border=1, *rows)
    instructions = P(
        UL(
            LI('To add several records, cut-and-paste'
               ' from the Google Docs spreadsheet'
               ' into the text box to the right.'),
            LI('To add another record,'
               ' fill in values in the first row.'),
            LI('To delete a record, set its SampleID'
               ' to a blank character.'),
            LI('To modify a record, change the value'
               ' in the field.'),
            LI('NOTE: Sample and project identifiers are now'
               ' strictly controlled: Offensive characters are'
               ' automatically converted to underscores.'),
            LI(
                'Specify index number for the sample like so:',
                TABLE(TR(TH('Index type'), TH('Standard name'),
                         TH('Alternate short name')),
                      TR(TD('Ordinary Illumina'), TD('samplename_index3'),
                         TD('samplename_3')),
                      TR(TD('Small RNA'), TD('samplename_rpi6'),
                         TD('samplename_r6')),
                      TR(TD('Agilent'), TD('samplename_agilent14'),
                         TD('samplename_a14')),
                      TR(TD('Mondrian'), TD('samplename_mondrian11'),
                         TD('samplename_m11')),
                      TR(TD('Haloplex'), TD('samplename_halo11'),
                         TD('samplename_h11')),
                      border=1)),
            LI('Click "Save" to store the samplesheet.'
               ' Comicbookguy will fetch it automatically'
               ' within 15 minutes.')))
    ops = TABLE(
        TR(
            TD(
                FORM(I('Cut-and-paste 4 columns'
                       ' (Lane, Sample, Project, Ref.genome).'),
                     TEXTAREA(name='cutandpaste', cols=40, rows=4),
                     INPUT(type='submit', value='Add'),
                     method='POST',
                     action=samplesheet.url))),
        TR(
            TD(
                FORM(INPUT(type='submit', value='Sort samplesheet records'),
                     INPUT(type='hidden', name='sort', value='default'),
                     method='POST',
                     action=samplesheet.url))),
        ## TR(TD(FORM(INPUT(type='submit',
        ##                  value='Download CSV file (obsolete)'),
        ##            method='GET',
        ##            action=samplesheet.file_url))),
        TR(
            TD(
                FORM(INPUT(type='submit',
                           value='Delete this samplesheet',
                           onclick="return confirm('Really delete?');"),
                     INPUT(type='hidden', name='http_method', value='DELETE'),
                     method='POST',
                     action=samplesheet.url))),
        width='100%')
    warning = []
    if xfer_msg:
        warning.append(P(xfer_msg))
    if problems:
        warning.append(
            P("There are problems regarding records %s!" %
              ', '.join(problems)))
    warning = DIV(style='color: red;', *warning)
    form = FORM(
        P(INPUT(type='submit', value='Save')),
        ##               INPUT(type='checkbox', name='append_a',
        ##                     value='y', checked=append_a),
        ##               " Append an 'A' to a newly defined index sequence."),
        P(table),
        method='POST',
        action=samplesheet.url)
    response['Content-Type'] = 'text/html'
    response.append(
        str(
            HTML(
                HEAD(TITLE(str(samplesheet))),
                BODY(A('Home', href=get_url()), H1(str(samplesheet)),
                     TABLE(TR(TD(instructions), TD(ops))), warning, form))))

コード例 #2

ファイルを表示

ファイル: wsgi_application.py プロジェクト: pekrau/samplesheet

def view(request, response, xfer_msg=None):
    if invalid_data_dir(request, response): return
    samplesheet = Samplesheet(request.path_named_values['fcid'])
    if not samplesheet.exists:
        raise HTTP_NOT_FOUND(str(samplesheet))
    samplesheet.read()
    problems = set()
    header = TR(TH(),
                TH('FCID'),
                TH('Lane'),
                TH('SampleID + index-spec',
                   BR(),
                   '(format: see above)',
                   width='20%'),
                TH('SampleRef'),
                TH('Index', BR(), '(sequence)'),
                TH('ProjectID'),
                TH('Control'),
                TH('Recipe'),
                TH('Operator'))
    rows = []
    # Key: lane number, value: tuple (seq index, SampleID)
    seqindex_lookup = dict()
    # Figure out whether that extra A has been appended previously.
    append_a = None
    for record in samplesheet.records:
        if append_a is None or append_a == True:
            append_a = len(record[4]) > 6 and record[4][-1] == 'A'
    if append_a is None:
        append_a = False
    # Require same index sequence length within each lane.
    index_sequence_lengths = [None] * 9     # 1-based index for max 8 lanes.
    for pos, record in enumerate(samplesheet.records):
        lane = record[1]
        lanes = []
        for i in xrange(1, 9):
            if i == lane:
                lanes.append(OPTION(str(i), selected=True))
            else:
                lanes.append(OPTION(str(i)))
        samplerefs = _get_sampleref_options(record[3])
        sample_warning = []
        project_warning = []
        # Check valid sampleid
        sampleid = record[2]
        if not SAMPLEID_RX.match(sampleid):
            sampleid = '_'.join(sampleid.split('_')[:-1])
            if not SAMPLEID_RX.match(sampleid):
                sample_warning.append('Invalid SampleID.')
        if record[3] == 'unknown':
            sample_warning.append('Unknown SampleRef.')
        if record[4]:                   # Check index sequence; '-' for dual
            if set(record[4].upper()).difference(set('ATGC-')):
                sample_warning.append('Invalid nucleotide in index sequence.')
            if index_sequence_lengths[lane] is None:
                index_sequence_lengths[lane] = len(record[4])
            else:
                if index_sequence_lengths[lane] != len(record[4]):
                    sample_warning.append('Unequal length of index sequence in lane.')
            other_seqindices = seqindex_lookup.get(lane, set())
            if record[4] in other_seqindices:
                sample_warning.append('Index sequence already used in lane.')
            else:
                for other_seqindex, other_sampleid in other_seqindices:
                    ld = levenshtein_distance(record[4], other_seqindex,
                                              shortest=True)
                    if ld < MIN_LEVENSHTEIN_DISTANCE:
                        sample_warning.append('Too small Levenshtein distance'
                                              ' between this index sequence'
                                              ' and sample %s in lane.'
                                              % other_sampleid)
                        break
                    hd = hamming_distance(record[4], other_seqindex,
                                          shortest=True)
                    if hd < MIN_HAMMING_DISTANCE:
                        sample_warning.append('Too small Hamming distance'
                                              ' between this index sequence'
                                              ' and sample %s in lane.'
                                              % other_sampleid)
                        break
                seqindex_lookup.setdefault(lane, set()).add((record[4], record[2]))
            indexseq = interpret_sampleid_for_index(record[2], append_a)
            if indexseq and indexseq != record[4]:
                sample_warning.append('SampleID and index sequence inconsistent.')
            if not record[4]:
                sample_warning.append('Index sequence missing.')
        else:
            sample_warning.append('Missing sequence.')
        if sample_warning:
            problems.add(pos+1)
        sample_warning = B('<br>'.join(sample_warning), style='color: red;')
        # The abominable dot '.' in project identifiers is stored as
        # double underscore, since CASAVA cannot handle dot.
        # For display purposes, the dot is shown instead of double underscore.
        description = record[5].replace('__', '.')
        if not PROJECTID_RX.match(record[5]):
            project_warning.append('Project ID is malformed')
        if project_warning:
            problems.add(pos+1)
        project_warning = B('<br>'.join(project_warning), style='color: red;')
        rows.append(TR(TD(str(pos+1)),
                       TD(record[0]),
                       TD(SELECT(name="lane%i" % pos, *lanes)),
                       TD(INPUT(type='text', name="sampleid%i" % pos,
                                value=record[2], size=24),
                          sample_warning),
                       TD(SELECT(name="sampleref%i" % pos, *samplerefs)),
                       TD(INPUT(type='text', name="index%i" % pos,
                                value=record[4], size=16)),
                       TD(INPUT(type='text', name="description%i" % pos,
                                value=description, size=24),
                          project_warning),
                       TD(INPUT(type='radio', name="control%i" % pos,
                                value='N', checked=record[6]=='N'), 'N ',
                          BR(),
                          INPUT(type='radio', name="control%i" % pos,
                                value='Y', checked=record[6]=='Y'), 'Y'),
                       TD(INPUT(type='text', name="recipe%i" % pos,
                                value=record[7], size=4)),
                       TD(INPUT(type='text', name="operator%i" % pos,
                                value=record[8], size=4))))
    try:
        previous_lane = samplesheet.records[-1][1]
        previous_sampleref = samplesheet.records[-1][3]
    except IndexError:
        previous_lane = None
        previous_sampleref = None
    lanes = []
    for i in xrange(1, 9):
        if i == previous_lane:
            lanes.append(OPTION(str(i), selected=True))
        else:
            lanes.append(OPTION(str(i)))
        samplerefs = _get_sampleref_options(previous_sampleref)
    rows.append(TR(TD(str(len(samplesheet.records)+1)),
                   TD(samplesheet.fcid),
                   TD(SELECT(name='lane', multiple=True, *lanes)),
                   TD(INPUT(type='text', name='sampleid', size=24)),
                   TD(SELECT(name='sampleref', *samplerefs)),
                   TD(INPUT(type='text', name='index', size=16)),
                   TD(INPUT(type='text', name='description', size=24)),
                   TD(INPUT(type='radio', checked=True,
                            name='control', value='N'), 'N ',
                      BR(),
                      INPUT(type='radio', name='control', value='Y'), 'Y'),
                   TD(INPUT(type='text', name='recipe', size=4)),
                   TD(INPUT(type='text', name='operator', size=4))))
    rows.reverse()
    rows.insert(0, header)
    table = TABLE(border=1, cellpadding=2, *rows)
    instructions = P(UL(LI('To add several records, cut-and-paste'
                           ' from the Google Docs spreadsheet'
                           ' into the text box to the right, then save.'),
                        LI('To add another record, fill in values'
                           ' in the first row, then save.'),
                        LI('To delete a record, set its SampleID'
                           ' to a blank character, then save.'),
                        LI('To modify a record, change the value'
                           ' in the field, then save.'),
                        LI('Offensive characters in Project Identifiers'
                           ' will be automatically converted'
                           ' to underscores.'),
                        LI('SampleID must look like ',
                           B('P123_456'), ', possibly with any of the'
                           ' characters B, C, D or F attached.'),
                        LI('If the index suffix looks like a nucleotide'
                           ' sequence with at least 6 bases, it will be used.'),
                        LI('Specify index number for the sample by adding the'
                           ' appropriate suffix using underscore, like so:',
                           TABLE(TR(TH('Index type'),
                                    TH('Standard index spec'),
                                    TH('Alternate short index spec')),
                                 TR(TD('Illumina'),
                                    TD('sampleid_index3'),
                                    TD('sampleid_i3')),
                                 TR(TD('Small RNA'),
                                    TD('sampleid_rpi6'),
                                    TD('sampleid_r6')),
                                 TR(TD('Agilent'),
                                    TD('sampleid_agilent14'),
                                    TD('sampleid_a14')),
                                 TR(TD('Mondrian'),
                                    TD('sampleid_mondrian11'),
                                    TD('sampleid_m11')),
                                 TR(TD('Haloplex'),
                                    TD('sampleid_halo11'),
                                    TD('sampleid_h11')),
                                 TR(TD('Haloplex HT 8-bp'),
                                    TD('sampleid_haloht31'),
                                    TD('sampleid_hht31')),
                                 TR(TD('SureSelect'),
                                    TD('sampleid_sureselect9'),
                                    TD('sampleid_ss9')),
                                 TR(TD('TruSeq DNA Dual HT'),
                                    TD('sampleid_dual13')),
                                 TR(TD('Nextera Dual HT'),
                                    TD('sampleid_nxdual15')),
                                 TR(TD('Halo HT Dual'),
                                    TD('sampleid_haloht15dual')),
                                 TR(TD('Illumina Dual'),
                                    TD('sampleid_index15dual')),
                                 TR(TD('Agilent SureSelect XT'),
                                    TD('sampleid_xtd04')),
                                 border=1,
                                 cellpadding=2))))
    ops = TABLE(TR(TD(FORM(I('Cut-and-paste 4 columns'
                             ' (Lane, Sample, Project, Ref.genome).'),
                           TEXTAREA(name='cutandpaste', cols=40, rows=4),
                           INPUT(type='submit', value='Add'),
                           method='POST',
                           action=samplesheet.url))),
                TR(TD(FORM(INPUT(type='submit',
                                 value='Sort samplesheet records'),
                           INPUT(type='hidden',
                                 name='sort', value='default'),
                           method='POST',
                           action=samplesheet.url))),
                TR(TD(FORM(INPUT(type='submit',
                                 value='Delete this samplesheet',
                                 onclick="return confirm('Really delete?');"),
                           INPUT(type='hidden',
                                 name='http_method', value='DELETE'),
                           method='POST',
                           action=samplesheet.url))),
                width='100%')
    title = "%s (%s)" % (samplesheet,
                         A("CSV file", href=samplesheet.file_url))
    warning = []
    if xfer_msg:
        warning.append(P(xfer_msg))
    if problems:
        problems = sorted(problems)
        problems = ', '.join(map(str, problems))
        warning.append(P("There are problems regarding records %s!" % problems))
    warning = DIV(style='color: red;', *warning)
    form = FORM(P(INPUT(type='submit', value='Save'),
                  ' Store the samplesheet. The pipeline computer (comicbookguy)'
                  ' will fetch it automatically within 15 minutes.'),
                P(table),
                method='POST',
                action=samplesheet.url)
    response['Content-Type'] = 'text/html'
    response.append(str(HTML(HEAD(TITLE(str(samplesheet))),
                             BODY(A('Home', href=get_url()),
                                  H1(title),
                                  TABLE(TR(TD(instructions),
                                           TD(ops))),
                                  warning,
                                  form))))