def get_interpretation_markers(text):
    roman_dec = Word("ivxlcdm")
    upper_dec = Word(string.ascii_uppercase)

    marker_parser = LineStart() + (
        Word(string.digits) | roman_dec | upper_dec) + Suppress(".")

    for citation, start, end in marker_parser.scanString(text):
        return citation[0]
def parsed_title(text, appendix_letter):
    digit_str_parser = (Marker(appendix_letter)
                        + Suppress('-')
                        + grammar.a1.copy().leaveWhitespace()
                        + Optional(grammar.markerless_upper)
                        + Optional(grammar.paren_upper | grammar.paren_lower)
                        + Optional(grammar.paren_digit))
    part_roman_parser = Marker("part") + grammar.aI
    parser = LineStart() + (digit_str_parser | part_roman_parser)

    for match, _, _ in parser.scanString(text):
        return match
Esempio n. 3
0
def parsed_title(text, appendix_letter):
    digit_str_parser = (Marker(appendix_letter)
                        + Suppress('-')
                        + grammar.a1.copy().leaveWhitespace()
                        + Optional(grammar.markerless_upper)
                        + Optional(grammar.paren_upper | grammar.paren_lower)
                        + Optional(grammar.paren_digit))
    part_roman_parser = Marker("part") + grammar.aI
    parser = LineStart() + (digit_str_parser | part_roman_parser)

    for match, _, _ in parser.scanString(text):
        return match
def mwgtofasta(wiki, evt):

    from pyparsing import Word, Literal, printables, LineStart, SkipTo, Combine, nums

    raw_string = wiki.getActiveEditor().GetSelectedText()
    start, end = wiki.getActiveEditor().GetSelection()

    fastaheader = Combine(
        Literal(">").suppress() + Word(nums).setResultsName("number") +
        Literal("_").suppress())
    try:
        data, dataStart, dataEnd = fastaheader.scanString(raw_string).next()
    except StopIteration:
        number = 1
        dataStart = end - start
    else:
        number = int(data.number) + 1

    wiki.getActiveEditor().SetSelectionByCharPos(start, start + dataStart)

    name = Word(printables).setResultsName("name")
    seq_start = Literal("5'").suppress()
    seq_stop = Literal("3'").suppress()
    sequence = Combine(seq_start + SkipTo(seq_stop)).setResultsName("seq")
    mwg_primer = LineStart() + name + SkipTo(LineStart()) + sequence

    result = mwg_primer.scanString(raw_string)

    seqlist = [data for data, dataStart, dataEnd in result]

    number += len(seqlist)

    fasta_string = ""

    for data in seqlist:
        number -= 1
        s = data.seq.strip("-").replace("\n", "").replace(" ", "")
        fasta_string += ">{number}_{name} ({length}-mer)\n{seq}\n\n".format(
            number=number, name=data.name, length=len(s), seq=s)

    wiki.getActiveEditor().ReplaceSelection(fasta_string)
    wiki.getActiveEditor().SetSelectionByCharPos(start,
                                                 start + len(fasta_string))
Esempio n. 5
0
    match = re.findall(regex, text)

    with open("alunos.txt", "w") as f:
        for m in match:
            f.write(u"{}   {}\n".format(m[0], m[1]).encode("utf8"))

    from pyparsing import Word, Literal, printables, LineStart, SkipTo, Combine

    name = Word(printables).setResultsName("name")
    seq_start = Literal("5'").suppress()
    seq_stop = Literal("3'").suppress()
    sequence = Combine(seq_start + SkipTo(seq_stop)).setResultsName("seq")
    mwg_primer = LineStart() + name + SkipTo(LineStart()) + sequence

    result = mwg_primer.scanString(raw_string)

    seqlist = [data for data, dataStart, dataEnd in result]

    number += len(seqlist)

    fasta_string = ''

    for data in seqlist:
        number -= 1
        s = data.seq.strip("-").replace("\n", "").replace(" ", "")
        fasta_string += ">{number}_{name} ({length}-mer)\n{seq}\n\n".format(
            number=number, name=data.name, length=len(s), seq=s)

    fasta_string
Esempio n. 6
0






    from pyparsing import Word, Literal, printables, LineStart, SkipTo, Combine

    name        =  Word(printables).setResultsName("name")
    seq_start   =  Literal("5'").suppress()
    seq_stop    =  Literal("3'").suppress()
    sequence    =  Combine(seq_start + SkipTo(seq_stop)).setResultsName("seq")
    mwg_primer  =  LineStart() + name + SkipTo(LineStart()) + sequence

    result = mwg_primer.scanString(raw_string)

    seqlist = [data for data,dataStart,dataEnd in result]

    number+=len(seqlist)

    fasta_string = ''

    for data in seqlist:
        number-=1
        s=data.seq.strip("-").replace("\n","").replace(" ","")
        fasta_string+=">{number}_{name} ({length}-mer)\n{seq}\n\n".format(number=number,name=data.name,length=len(s),seq=s)

    fasta_string