def get_interpretation_markers(text): roman_dec = Word("ivxlcdm") upper_dec = Word(string.ascii_uppercase) marker_parser = LineStart() + ( Word(string.digits) | roman_dec | upper_dec) + Suppress(".") for citation, start, end in marker_parser.scanString(text): return citation[0]
def parsed_title(text, appendix_letter): digit_str_parser = (Marker(appendix_letter) + Suppress('-') + grammar.a1.copy().leaveWhitespace() + Optional(grammar.markerless_upper) + Optional(grammar.paren_upper | grammar.paren_lower) + Optional(grammar.paren_digit)) part_roman_parser = Marker("part") + grammar.aI parser = LineStart() + (digit_str_parser | part_roman_parser) for match, _, _ in parser.scanString(text): return match
def parsed_title(text, appendix_letter): digit_str_parser = (Marker(appendix_letter) + Suppress('-') + grammar.a1.copy().leaveWhitespace() + Optional(grammar.markerless_upper) + Optional(grammar.paren_upper | grammar.paren_lower) + Optional(grammar.paren_digit)) part_roman_parser = Marker("part") + grammar.aI parser = LineStart() + (digit_str_parser | part_roman_parser) for match, _, _ in parser.scanString(text): return match
def mwgtofasta(wiki, evt): from pyparsing import Word, Literal, printables, LineStart, SkipTo, Combine, nums raw_string = wiki.getActiveEditor().GetSelectedText() start, end = wiki.getActiveEditor().GetSelection() fastaheader = Combine( Literal(">").suppress() + Word(nums).setResultsName("number") + Literal("_").suppress()) try: data, dataStart, dataEnd = fastaheader.scanString(raw_string).next() except StopIteration: number = 1 dataStart = end - start else: number = int(data.number) + 1 wiki.getActiveEditor().SetSelectionByCharPos(start, start + dataStart) name = Word(printables).setResultsName("name") seq_start = Literal("5'").suppress() seq_stop = Literal("3'").suppress() sequence = Combine(seq_start + SkipTo(seq_stop)).setResultsName("seq") mwg_primer = LineStart() + name + SkipTo(LineStart()) + sequence result = mwg_primer.scanString(raw_string) seqlist = [data for data, dataStart, dataEnd in result] number += len(seqlist) fasta_string = "" for data in seqlist: number -= 1 s = data.seq.strip("-").replace("\n", "").replace(" ", "") fasta_string += ">{number}_{name} ({length}-mer)\n{seq}\n\n".format( number=number, name=data.name, length=len(s), seq=s) wiki.getActiveEditor().ReplaceSelection(fasta_string) wiki.getActiveEditor().SetSelectionByCharPos(start, start + len(fasta_string))
match = re.findall(regex, text) with open("alunos.txt", "w") as f: for m in match: f.write(u"{} {}\n".format(m[0], m[1]).encode("utf8")) from pyparsing import Word, Literal, printables, LineStart, SkipTo, Combine name = Word(printables).setResultsName("name") seq_start = Literal("5'").suppress() seq_stop = Literal("3'").suppress() sequence = Combine(seq_start + SkipTo(seq_stop)).setResultsName("seq") mwg_primer = LineStart() + name + SkipTo(LineStart()) + sequence result = mwg_primer.scanString(raw_string) seqlist = [data for data, dataStart, dataEnd in result] number += len(seqlist) fasta_string = '' for data in seqlist: number -= 1 s = data.seq.strip("-").replace("\n", "").replace(" ", "") fasta_string += ">{number}_{name} ({length}-mer)\n{seq}\n\n".format( number=number, name=data.name, length=len(s), seq=s) fasta_string
from pyparsing import Word, Literal, printables, LineStart, SkipTo, Combine name = Word(printables).setResultsName("name") seq_start = Literal("5'").suppress() seq_stop = Literal("3'").suppress() sequence = Combine(seq_start + SkipTo(seq_stop)).setResultsName("seq") mwg_primer = LineStart() + name + SkipTo(LineStart()) + sequence result = mwg_primer.scanString(raw_string) seqlist = [data for data,dataStart,dataEnd in result] number+=len(seqlist) fasta_string = '' for data in seqlist: number-=1 s=data.seq.strip("-").replace("\n","").replace(" ","") fasta_string+=">{number}_{name} ({length}-mer)\n{seq}\n\n".format(number=number,name=data.name,length=len(s),seq=s) fasta_string