def digit2word(key): key=key.replace(' U.S.','').replace(',','').replace('.','').strip() if key.replace('0','')=='': return 'zero' if key=='-0': return 'minus zero' try: if key.endswith("'s"): return digit2word(key[:-2])+"'s" if key.endswith("s"): return digit2word(key[:-1])+"'s" key=str(roman.fromRoman(key.replace('IIII','IV'))) except: pass try: key=str(roman.fromRoman(key.split()[0].replace('IIII','IV'))) except: pass try: text = p.number_to_words(key,decimal='point',andword='', zero='o') if re.match(r'^0\.',key): text = 'zero '+text[2:] if re.match(r'.*\.0$',key): text = text[:-2]+' zero' text = text.replace('-',' ').replace(',','') return text.lower() except: return key
def testFromRomanCase(): """fromRoman should only accept uppercase input""" for integer in range(1, 5000): numeral = roman.toRoman(integer) roman.fromRoman(numeral.upper()) with pytest.raises(roman.InvalidRomanNumeralError): roman.fromRoman(numeral.lower())
def roman2dec() -> int: """ Entry point for `se roman2dec` """ import roman parser = argparse.ArgumentParser(description="Convert a Roman numeral to a decimal number.") parser.add_argument("-n", "--no-newline", dest="newline", action="store_false", help="don’t end output with a newline") parser.add_argument("numbers", metavar="NUMERAL", nargs="+", help="a Roman numeral") args = parser.parse_args() lines = [] if not sys.stdin.isatty(): for line in sys.stdin: lines.append(line.rstrip("\n")) for line in args.numbers: lines.append(line) for line in lines: try: if args.newline: print(roman.fromRoman(line.upper())) else: print(roman.fromRoman(line.upper()), end="") except roman.InvalidRomanNumeralError: se.print_error("Not a Roman numeral: {}".format(line)) return se.InvalidInputException.code return 0
def testFromRomanCase(self): """fromRoman should only accept uppercase input""" for integer in range(1, 4000): numeral = roman.toRoman(integer) roman.fromRoman(numeral.upper()) self.assertRaises(roman.InvalidRomanNumeralError, roman.fromRoman, numeral.lower())
def infer_production_century(br, de): """ Estimate the production century for an author. If born on or after the 80th year of a century, say it's the following century. If no dates (only centuries), and centuries differ, output a .5 date (e.g. birth XVI, death XVII => 16.5). Output False when birth or death date missing. """ if str(br) == '0' or str(de) == '0': return False try: byear = int(br) dyear = int(de) except ValueError: try: byear = roman.fromRoman(br) dyear = roman.fromRoman(de) if byear == dyear: pcen = byear else: assert dyear == byear + 1 pcen = byear + 0.5 return pcen except roman.InvalidRomanNumeralError: return False if byear % 100 >= 80: pcen = floor(byear / 100) + 2 assert floor(dyear / 100) + 1 == pcen else: pcen = floor(byear / 100) + 1 return pcen
def testFromRomanCase(self): """fromRoman should only accept uppercase input""" for integer in range(1, 5000): numeral = roman.toRoman(integer) roman.fromRoman(numeral.upper()) self.assertRaises(roman.InvalidRomanNumeralError, roman.fromRoman, numeral.lower())
def is_roman_number(roman_as_str): """ returns True in case provided value is valid roman number, False otherwise """ try: fromRoman(roman_as_str) except InvalidRomanNumeralError: return False else: return True
def roman_magic(self, v1, op, v2): rom_v1 = str(fromRoman(v1)) rom_v2 = str(fromRoman(v2)) operation = rom_v1+op+rom_v2 return toRoman(eval(operation))
def testFromRomanCase(self): """fromRoman should only accept uppercase input""" for integer in range(1, 5000): numeral = roman.toRoman(integer) roman.fromRoman(numeral.upper()) # assume nothing about toRoman() in the fromRoman() test! # also, this is an implicit test that this line does NOT raise self.assertRaises(roman.InvalidRomanNumeralError, roman.fromRoman, numeral.lower())
def is_roman(goo): try: roman.fromRoman(goo) result = True except roman.InvalidRomanNumeralError: result = False return result
def isRoman(txt): # helper function, checks if string contains valid roman number # uses pip module roman: https://pypi.python.org/pypi/roman try: roman.fromRoman(txt) return True except roman.InvalidRomanNumeralError: return False
def replace_chapter_number(match): string = match.group(1) try: string = str(roman.fromRoman(string)) except roman.InvalidRomanNumeralError: if string.startswith('L'): string = str(roman.fromRoman(string[1:])) return f'Chapitre {string}.'
def start_end(self): match = PAGE_ORDER_RX.match(self.name) if match is not None: start = match.group("start") end = match.group("end") if start.isdigit(): return int(start), int(end) start = roman.fromRoman(start) end = roman.fromRoman(end) return start, end
def is_roman(text: str) -> bool: """Check if the given string is a Roman number. Return True if it is, False if not. """ text = text.strip().upper() text = re.sub(r'[^A-Z]', "", text) try: fromRoman(text) except NoRoman: return False else: return True
def getMinimalFormOfNumeral(numeral): value = 0 while (len(numeral) > 1): if (roman.fromRoman(numeral[0]) < roman.fromRoman(numeral[1])): value -= roman.fromRoman(numeral[0]) else: value += roman.fromRoman(numeral[0]) numeral = numeral[1:] value += roman.fromRoman(numeral[0]) return roman.toRoman(value)
def convertToNum(input_type, s): if input_type == "R": num = roman.fromRoman(s) elif input_type == "r": num = roman.fromRoman(s.upper()) elif input_type == "a" or input_type == "A": num = fromCharString(s.lower()) elif input_type == "i" or input_type == "f": num = float(s) else: num = 0 return num
def replace_roman_numerals_in_match(m): s = m.group(3) s = s.upper() try: if s: if m.group(8): return u"{}{}:{}".format(m.group(1), roman.fromRoman(s), m.group(8)) else: return u"{}{}{}".format(m.group(1), roman.fromRoman(s), m.group(7)) else: return m.group(0) except: return m.group(0)
def romanDigitRangeCorrector(cite_in): dash_expression = re.compile('[-–—]') if dash_expression.search(cite_in): first_roman_num = re.split(dash_expression, cite_in)[0] second_roman_num = second_num = re.split(dash_expression, cite_in)[1] first_num = str(roman.fromRoman(first_roman_num.upper())) second_num = str(roman.fromRoman(second_roman_num.upper())) arabic_range = first_num + '-' + second_num return arabic_range else: return str(roman.fromRoman(cite_in.upper()))
def normalize_string(self, input_string): """ As we have several variations of the Canon Mark models we have to clean it up 1) break down the input string into tokens 2) check if one of those tokens is a variation of "Mark" (i.e. mark, mk, MarkIII, 5dmark, etc) 3) check if the mark model number is in roman and covert it to INT 4) eliminating spaces and lowering case """ tokens = [t for t in input_string.split()] normalized_tokens = [] lookup_pattern = re.compile("(m(?:ar)?k\s*)([ivx\d]*)", flags=re.IGNORECASE) roman_nums = roman.romanNumeralPattern for i, token in enumerate(tokens, 0): # check if the token is a variation of mark lookup = lookup_pattern.search(token) if lookup: # some strings have the 5d concatenated to the model, let's separate it if re.search('5d', token, re.IGNORECASE): token = '5D Mark' else: token = 'Mark' model_nbr = lookup.group(2) # check if the model number is part of the same token and covert it to ,INT if model_nbr != '': if model_nbr.isdigit(): pass else: model_nbr = roman.fromRoman(model_nbr.upper()) token = token + ' ' + str(model_nbr) # check if the token is a roman numeral and covert it to INT is_roman = roman_nums.search(token) if is_roman: token = str(roman.fromRoman(token)) normalized_tokens.append(token) # convert the list of normalized tokens into a single string and normalize it output_string = ' '.join(normalized_tokens) output_string = output_string.strip() output_string = output_string.lower() output_string = output_string.replace(" ", "") return output_string
def onLine(l): global PLAY global PLAY_LINES global line global inAct global inScene global inCharacter global currCharacter global firstBlock l = l.rstrip() if isSection(l, START_ACT): inAct = roman.fromRoman(l.split()[1]) if DEBUG: print('Act', inAct) createIfEmpty(PLAY, inAct) inScene = False inCharacter = False return if isSection(l, START_SCENE): inScene = roman.fromRoman(l.split()[1][:-1]) if DEBUG: print('Scene', inScene) createIfEmpty(PLAY[inAct], inScene) PLAY[inAct][inScene] = [] inCharacter = False line = 0 return if isSection(l, START_CHARACTER) and \ l[len(START_CHARACTER) + 1] != ' ' and \ len([word for word in l.split() if word[0].isupper()]) == len(l.split()): flushCharacter() inCharacter = l.strip().upper() currCharacter = '' firstBlock = True if DEBUG: print('Character', inCharacter) return if inAct and inScene and inCharacter and firstBlock: if len(l.strip()) == 0 and firstBlock: if len(currCharacter) > 0: firstBlock = False return line += 1 PLAY_LINES[''.join( [str(inAct), str(inScene), inCharacter, l.strip().upper()])] = line currCharacter += l + '\n'
def _ordinal2word(wordsList, indice): """Convert an ordinal number to a written word. i.e. 1er --> premier param strNumber: an utf-8 ordinal number return a 'written' ordinal number """ strNumber = NumberFormula._normalizeNumber(wordsList[indice]) if strNumber.encode('utf-8') == u"1ère".encode('utf-8'): return u"première" strNewNumber = re.sub(u"[erèm]", "", strNumber) if NumberFormula._isCardinalNumber(strNewNumber): strNewNumber = num2words(int(strNewNumber), ordinal=True, lang='fr') elif NumberFormula._isRomanNumber(strNewNumber): #Roman to cardinal strNewNumber = strNewNumber.encode('utf-8') cardinalNumber = fromRoman(strNewNumber) #Digits to ordinal strNewNumber = num2words(cardinalNumber, ordinal=True, lang='fr') else: strNewNumber = strNumber strNewNumber = re.sub(r'vingtsi','vingti',strNewNumber) strNewNumber = re.sub(r'centsi','centi',strNewNumber) strNewNumber = re.sub(r'millionsi','millioni',strNewNumber) strNewNumber = re.sub(r'milliardsi','milliardi',strNewNumber) return strNewNumber
def get_orf_data(chromo, start, end, orf_annotation = None): if orf_annotation is None: sys.exit("For ORF annotation, a GFF file needs to be provided. Such as this one: https://github.com/jianlingzhong/COMPETE_examples/blob/master/saccharomyces_cerevisiae.20080621.gff") genes = pd.read_csv(orf_annotation, sep = '\t', comment='#', header = None) # only need the gene features in the gff genes = genes.loc[genes.iloc[:, 2] == 'gene', ] # don't need Mito and 2-micron chromosome features genes = genes.loc[(genes.iloc[:, 0] != 'chrMito') & (genes.iloc[:, 0] != '2-micron')] # just need the following columns genes = genes.iloc[:, [0, 3,4,6,8]] genes.rename(columns={0:'chromo', 3:'start', 4:'end', 6:'strand', 8:'name'}, inplace = True) # convert 'chrI' to 1 genes.chromo = genes.chromo.apply(lambda x: roman.fromRoman(x[3:])) genes = genes.loc[(genes.chromo == chromo)] selection = ~((genes.end < start) | (genes.start > end)) genes = genes.loc[selection] genes.name = genes.name.apply(find_name) return genes
def _parse_stenogram_meta(self, response, meta_xs): meta = {} source = self._get_source(response.url, 'p_id') meta['source'] = source meta['_id'] = source['id'] date_match = meta_xs.re(date_re) if date_match: year = int(date_match[0]) month = month_names_map[date_match[1]] day = int(date_match[2]) meta['date'] = date(year, month, day) sitting_match = meta_xs.re(sitting_re) if sitting_match: meta['sitting_time'] = sitting_match[0].title() meta['sitting_no'] = sitting_match[1] else: sitting_no_match = meta_xs.re(sitting_no_re) if sitting_no_match: meta['sitting_no'] = sitting_no_match[0] session_match = meta_xs.re(session_re) if session_match: meta['session_no'] = roman.fromRoman(session_match[0]) meta['session_season'] = session_match[1].title() if 'date' in meta: meta['session'] = '{} {}'.format(meta['date'].year, meta['session_season']) return meta
def sort_collections(t): sort_key = t[0] if ": Chartae Latinae" in t[0]: key_parts = sort_key.split() key_parts[-1] = '{:04}'.format(roman.fromRoman(key_parts[-1])) sort_key = ' '.join(key_parts) return sort_key
def nameclean(orig): name = orig.encode('ascii', errors='ignore').decode('ascii') name = name.upper().strip() name = name.replace('GOTY', 'GAME OF THE YEAR') ignoredphrases = [ '\(', '\)', 'the', 'with', 'Early Access', 'bundle', 'and', '&', 'in', 'vr', 'beta', 'Double Pack', 'Pack', 'Free to Play', 'Edition' ] for phrase in ignoredphrases: name = re.sub(r'\b{0}\b'.format(phrase), '', name, flags=re.IGNORECASE) name = re.sub(r'\bone\b', '1', name, flags=re.IGNORECASE) name = re.sub(r'\btwo\b', '2', name, flags=re.IGNORECASE) name = re.sub(r'\b40k\b', '40,000', name, flags=re.IGNORECASE) name = re.sub(r'\+', 'PLUS', name, flags=re.IGNORECASE) name = re.sub(r'[^\w\s]', ' ', name, flags=re.IGNORECASE) name = re.sub( r'(\A|\s)(\b(?=[MDCLXVI]+\b)M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\b)(\s|\Z)', lambda x: x.group(1) + str(roman.fromRoman(x.group(2).strip( ))) + x.group(6) if (x.group(0).strip()) else '', name, flags=re.IGNORECASE) name = re.sub(r'(\w*)s\b', r'\1', name, flags=re.IGNORECASE) words = name.split() name = ' '.join(sorted(set(words), key=words.index)) name = re.sub(r'\s+', '', name, flags=re.IGNORECASE).strip() return name
def setValues(siglum): ''' Take Roman numerals in the text (already marked up with <num>) and insert a @value attribute with its arabic value. The 'siglum' is the XML filename to parse ('g' for 'g.xml'; 'bonetti' for 'bonetti.xml' etc.). Then write the modified tree to another XML file ('numerals-g.xml'; 'numerals-bonetti.xml' etc.). ''' n = myconst.ns tree = etree.parse('../xml/%s.xml' % siglum) numbers = tree.findall('.//t:num', n) for number in numbers: if number.get('value') is None: content = getRomanContent(number, checkallnumbers=False) if content == '': print('foo', number.text) # print(content.upper(), end='\t') # debug try: myvalue = roman.fromRoman(content.upper()) #print(roman.fromRoman(content.upper())) # debug number.set('value', str(myvalue)) number.set('type', 'guessedvalue') except roman.InvalidRomanNumeralError: print('Numero romano non parsabile:', content.upper()) number.set('type', 'foo') else: #print('Il valore di @value era già settato a', number.get('value')) pass tree.write('../xml/numerals-%s.xml' % (siglum), encoding='UTF-8', method='xml', pretty_print=True, xml_declaration=True)
def token_features(y): x=y.strip() parts=x.split('.',2) pattern = re.compile('[1-9][0-9]*(\.)?([1-9][0-9]*(\.([1-9][0-9]*)?)?)?') m = pattern.match(x) if x=="Table" or x=="TABLE" or x=="Figure" or x=="FIGURE" or x=="Fig." or x=="FIG.": return "0" if m and m.span()==(0, len(x)): return "1" try: roman.fromRoman(parts[0].upper()) return "1" except: if x[0].isupper(): return "2" return "3"
def sgd_gff2dataframe(input_annot_gff_filename, ok_features_list, include_dubious=False): """ loads sgd annotation gff file to dataframe\ leave only features of the types in the input list filters mitochondrial and 2-micron genes """ gff_df = gff2dataframe(input_annot_gff_filename) gff_df['Name'] = gff_df['Name'].str.strip() #gff_df = gff_df[gff_df['feature'].isin(['gene','CDS']) ] # just coding sequence gff_df = gff_df[gff_df['feature'].isin(ok_features_list) ] # ['CDS'] # filtering dubious and not in standard chromosomes if include_dubious: idx = ~gff_df['chrom'].isin(['chrMito', '2-micron']) else: idx = gff_df['Note'].apply(lambda x: not str(x).startswith('Dubious')) & ~gff_df['chrom'].isin(['chrMito', '2-micron']) gff_df = gff_df[idx] # fixing chromosome to be chrXX where XX is integer gff_df['chrom'] = gff_df['chrom'].apply(lambda x: 'chr' + str(fromRoman(str(x).replace('chr',''))).zfill(2) ) # removing "SGD:" from the id column gff_df['dbxref'] = gff_df['dbxref'].apply(lambda x: str(x).replace('SGD:','')) gff_df['start'] = gff_df['start'].apply(int) return(gff_df)
def arabicOrRomanToInt(s): m = re.match(r'\d+$', s) if m: i = int(s) else: i = roman.fromRoman(s) return i
def replace_ordinal(token): """10.序数词""" res = token.strip() replace_list = [ "th", "TH", "Th", 'st', 'ST', 'nd', 'ND', 'rd', 'RD', ".", "º", ",", "ª" ] ends = "" if res.endswith("'s"): ends = "'s" res = res[:-2] if res.endswith("s"): ends = "s" res = res[:-1] for one in replace_list: res = res.replace(one, "").strip() if res.isdigit(): res = numstr2word(float(res), ordinal=True) else: # 可能是罗马数字 try: num = roman.fromRoman(res) res = numstr2word(num, ordinal=True) res = "the " + res except roman.InvalidRomanNumeralError: pass # print("roman.InvalidRomanNumeralError", res) return res + ends
def load(): a=pd.read_csv('/home/arya/storage/Data/Yeast/BurkeYeast/freq_tables/snps.txt',sep='\t').rename(columns={'chr':'CHROM','pos':'POS'}).set_index(['CHROM','POS']).iloc[:,2:] c=pd.Series(a.columns) founders=a[c[c.apply(lambda x: x[0]=='F')]] founders.loc[:,map(lambda x:'cov' in x,founders.columns)].sum(1) def toint(x): try:return int(x) except: return x cd=a[c[c.apply(lambda x: 'maf' ==x[:3]) |c.apply(lambda x: 'cov' ==x[:3])]] cd.columns=map(lambda x:tuple(map(lambda y: toint(y.replace('cov','D').replace('maf','C')) ,x.split('_'))), cd.columns) cd.columns=pd.MultiIndex.from_tuples(cd.columns) cd=pd.concat([(cd['C']* cd['D']).round().astype(int),cd['D']],1,keys=['C','D']).reorder_levels([1,2,0],axis=1) cd.columns.names=['REP','GEN','READ'] ancestors=a[c[c.apply(lambda x: 'anc' in x)]].rename(columns={'anc_maf':'C','anc_cov':'D'}) ancestors['C']=(ancestors['C']*ancestors['D']).round().astype(int) ancestors=pd.concat([ancestors],1,keys=[0]) R=cd.columns.levels[0] ancestors=pd.concat([ancestors for _ in R],1,keys=R) ancestors.columns.names=['REP','GEN','READ'] cd=pd.concat([cd,ancestors],1).sort_index(1).sort_index() weekToGen={0:0, 6:180, 12:360, 18:540} cd=cd.T.reset_index() cd.GEN=cd.GEN.replace(weekToGen) cd=cd.set_index(['REP','GEN','READ']).T.sort_index(1).sort_index() try: import roman cd=cd.reset_index() cd.CHROM=cd.CHROM.apply(lambda x:roman.fromRoman(x[3:])) cd=cd.set_index(['CHROM','POS']) except: pass return
def replace_cardinal(token): """5.普通数字""" token = token.replace('U.S.', "") token = token.replace(" ", "") token = token.replace('"', "") token = token.replace(',', "") token = token.replace('.', "") token = token.replace(':', "") minus = False if len(token) == 0: return "" if token.startswith("-"): minus = True token = token[1:] token = token.replace('-', "") ends = "" if token.endswith("'s"): ends = "'s" token = token[:-2] elif token.endswith("A") or token.endswith("M"): token = token[:-1].strip() if token.isdigit(): res = numstr2word(token) else: # 可能是罗马数字 try: num = roman.fromRoman(token) res = numstr2word(num) except roman.InvalidRomanNumeralError: res = token if minus: res = "minus " + res res += ends return res
def numeric(x): if x[0].isdigit(): return int(x) + Util.INTEGER_OFFSET try: return roman.fromRoman(x) except roman.RomanError: return 0
def testSanity(self): """fromRoman(toRoman(n)) == n for all n""" for integer in range(1, 5000): numeral = roman.toRoman(integer) result = roman.fromRoman(numeral) self.assertEqual(integer, result)
def _ordinal2word(wordsList, indice): """Convert an ordinal number to a written word. i.e. 1er --> premier param strNumber: an utf-8 ordinal number return a 'written' ordinal number """ strNumber = NumberFormula._normalizeNumber(wordsList[indice]) if strNumber.encode('utf-8') == u"1st".encode('utf-8'): return u"first" strNewNumber = re.sub(u"[ndstrh]", "", strNumber) # print strNewNumber # if NumberFormula._isCardinalNumber(strNewNumber): if strNewNumber.isdigit(): strNewNumber = num2words(int(strNewNumber), ordinal=True) # print(strNewNumber) elif NumberFormula._isRomanNumber(strNewNumber): #Roman to cardinal strNewNumber = strNewNumber.encode('utf-8') # print strNewNumber cardinalNumber = fromRoman(strNewNumber) #Digits to ordinal strNewNumber = num2words(cardinalNumber, ordinal=True) else: print("newnumberis not digit!!!") strNewNumber = strNumber return strNewNumber
def get_unit(title): pattern = patterns.makeTitlePatterns() title = title.lower() unit = title.replace(" ", "_") unit = re.sub("[ \[\]\(\)]", "_", title) number = "1" for unitForm, unitPattern in pattern.iteritems(): m = unitPattern.match(title) if m: if unitForm == "title_preamble": unit = m.group(1) number = "1" elif unitForm == "title_number": unit = "unspecified" number = m.group(1) elif unitForm == "title_unit_roman": unit = m.group(1) try: number = str(roman.fromRoman(m.group(2).upper())) except roman.InvalidRomanNumeralError: print "Invalid Roman title number:", title, "unit:", unit, "number:", number number = m.group(2) else: unit = m.group(1) number = m.group(2) if unit == "art": unit = "article" return unit, number
def tb_heading_features(y): x=y.strip() parts=x.split('.',2) pattern = re.compile('[1-9][0-9]*(\.)?([1-9][0-9]*(\.([1-9][0-9]*)?)?)?') m = pattern.match(x) if x=="Table" or x=="TABLE" or x=="Figure" or x=="FIGURE" or x=="Fig." or x=="FIG.": return "0" if m and m.span()==(0, len(x)): return "1" try: roman.fromRoman(parts[0].upper()) return "1" except: if x[0].isupper(): return "2" return "3"
def maybe_normalize(self, value, mapping='', roman_normalization=True, mapping_append=True): if mapping == '': mapping = self.default_mapping else: mapping = mapping + self.default_mapping if mapping_append else self.default_mapping + mapping for norm in mapping: if type(norm[0]) == str: value = value.replace(norm[0], norm[1]) elif isinstance(norm[0], Pattern): value = norm[0].sub(norm[1], value) else: print('UNEXPECTED', type(norm[0]), norm[0]) if roman_normalization: for ro_before, ro_after, ro in self.get_roman_numbers(value): try: value = value.replace( ro_before + ro + ro_after, ro_before + str(roman.fromRoman(ro)) + ro_after) except roman.InvalidRomanNumeralError as ex: print(ex) pass value = self.clean_single_line(value) return value.replace(' ', " ")
def token_features(y): x=y.strip() parts=x.split('.') if(x=="$$$"): return "5" if x=="Table" or x=="TABLE" or x=="Figure" or x=="FIGURE" or x=="Fig.": return "0" p_len = len(parts) if(p_len==1): if(x.isdigit() and 1<=int(x)<=20): return "1" if(p_len==2 or p_len==3): if(parts[0].isdigit() and 1<=int(parts[0])<=20): if(parts[1]=='' or (parts[1].isdigit() and int(parts[1])<=20)): if(p_len==2): return "1" if(parts[1].isdigit() and int(parts[1])<=20 and parts[2]==''): return "1" if(p_len==1 or (p_len==2 and parts[1]=='')): try: val = roman.fromRoman(parts[0].upper()) if(val<=20): return "1" except: None if((len(parts[0])==1 and 'A'<=parts[0]<='Z') or (len(parts[0])==3 and parts[0][0]=='(' and parts[0][2]==')' and parts[0][1].isalpha() and parts[0][1].isupper()) or (len(parts[0])==2 and parts[0][1]==')' and parts[0][0].isalpha() and parts[0][0].isupper())): if(p_len==1 or (p_len==2 and parts[1]=='')): return "1" if x[0].isupper(): return "2" if (not(parts[0].isalpha() or parts[0].isdigit())): return "3" return "4"
def get_spenser(self): ''' Returns a dictionary of sonnets. Each element of the dict is one of the sonnets, keyed by sonnet number Each sonnet is a list of lists of lines ''' filename = 'data/spenser.txt' num = 0 sonnets = {} sonnet_lines = [] with open(filename) as f: for line in f: line = re.sub('[:;.()?!]', '', line.strip()).lower() try: num = int(roman.fromRoman(line.upper())) sonnets[num] = [] sonnet_lines = [] continue except ValueError: pass except roman.InvalidRomanNumeralError: pass next_line = line.split() if len(next_line) > 0: sonnets[num].append(next_line) return sonnets
def getSortedList_easy(lst): """ Uses pre-made module Roman for speed comparision/ to check if conversion is correct :param lst: List of Stringed RoyalNames :return: List of Sorted RoyalNames """ if all(isinstance(x, str) for x in lst): roman_to_int_convertedlst = [] int_to_roman_convertedlst = [] for name,ordinal in [royalname.split() for royalname in lst]: #split the name and roman number newnumber = roman.fromRoman(ordinal) royalname = name, newnumber roman_to_int_convertedlst.append(royalname) print("Converted from", lst, "\n" "to", roman_to_int_convertedlst) sortedlst = sorted(roman_to_int_convertedlst) print("Sorted", sortedlst) for name,integer in sortedlst: roman_numeral = roman.toRoman(integer) int_to_roman_convertedlst.append(name+" "+str(roman_numeral)) return int_to_roman_convertedlst
def replace_roman_numerals_in_match(m): s = m.group(2) s = s.upper() try: if s: return "%s%s:%s" % (m.group(1), roman.fromRoman(s), m.group(7)) except: return m.group(0)
def ps_event(numeral): try: number = roman.fromRoman(numeral.upper()) except roman.InvalidRomanNumeralError: return "Invalid roman numeral!", 400 event = ps_data.get_ps_event_by_number(number) return flask.render_template('event.html', event=event)
def parse_token(token): if token in '()+-*/': return token if len(token.strip('0123456789,')) == 0: return token.replace(',', '') if len(token.strip('IVXLCDM')) == 0: return str(roman.fromRoman(token)) log.warning('Unknown token: ' + token) return raw_input()
def _extract_charge(arg: str): """ Receive a `str` representing an element, isotope, or ion. Return a `tuple` containing a `str` that should represent an element or isotope, and either an `int` representing the charge or `None` if no charge information is provided. Raise an `~plasmapy.utils.InvalidParticleError` if charge information is inputted incorrectly. """ invalid_charge_errmsg = (f"Invalid charge information in the particle string '{arg}'.") if arg.count(' ') == 1: # Cases like 'H 1-' and 'Fe-56 1+' isotope_info, charge_info = arg.split(' ') sign_indicator_only_on_one_end = ( charge_info.endswith(('-', '+')) ^ charge_info.startswith(('-', '+'))) just_one_sign_indicator = ( (charge_info.count('-') == 1 and charge_info.count('+') == 0) or (charge_info.count('-') == 0 and charge_info.count('+') == 1)) if not sign_indicator_only_on_one_end and just_one_sign_indicator: raise InvalidParticleError(invalid_charge_errmsg) from None charge_str = charge_info.strip('+-') try: if roman.romanNumeralPattern.match(charge_info): Z_from_arg = roman.fromRoman(charge_info) - 1 elif '-' in charge_info: Z_from_arg = - int(charge_str) elif '+' in charge_info: Z_from_arg = int(charge_str) else: raise InvalidParticleError(invalid_charge_errmsg) from None except ValueError: raise InvalidParticleError(invalid_charge_errmsg) from None elif arg.endswith(('-', '+')): # Cases like 'H-' and 'Pb-209+++' char = arg[-1] match = re.match(f"[{char}]*", arg[::-1]) Z_from_arg = match.span()[1] isotope_info = arg[0:len(arg) - match.span()[1]] if char == '-': Z_from_arg = -Z_from_arg if isotope_info.endswith(('-', '+')): raise InvalidParticleError(invalid_charge_errmsg) from None else: isotope_info = arg Z_from_arg = None return isotope_info, Z_from_arg
def _roman2word(strNumber): """Convert a roman number to a written word. param strNumber: an utf-8 roman number return a 'written' roman number """ strNumber = strNumber.encode('utf-8') cardinalNumber = fromRoman(strNumber) return NumberFormula._cardinal2word(cardinalNumber)
def markup(self): all = descend(self.dom,stackOfTags=[],seenSoFar=dict(),depth=1) returnable = [] for key in all: line = dict(key) line["textString"] = all[key] if re.search(r"\w+",line["textString"]): try: line["actNumber"] = roman.fromRoman(re.findall(r"(?:ACT|Act) +(\w+)",line["ACT"])[0]) except KeyError: pass try: line["sceneNumber"] = roman.fromRoman(re.findall(r"(?:SCENE|Scene) +([IXVL]+)",line["SCENE"])[0]) except KeyError: pass except: pass returnable.append(line) return returnable
def _roman2word(strNumber): """Convert a roman number to a written word. param strNumber: an utf-8 roman number return a 'written' roman number """ strNumber = strNumber.encode('utf-8') cardinalNumber = fromRoman(strNumber) strNewNumber = num2words(cardinalNumber) return strNewNumber
def _to_int(self, value): if isinstance(value, int): return value elif isinstance(value, basestring) and self.is_roman(value.upper()): return roman.fromRoman(value.upper()) elif isinstance(value, RomanNumeral): return value._val else: raise ValueError("Value must be a valid roman numeral, a string" " representing one or an integer: '{0}'" .format(value))
def parse_worked_on(self, text_or_node, submodule): submodule = roman.fromRoman(submodule) - 1 worked_strings = [] if isinstance(text_or_node, basestring): worked_strings = [text_or_node] else: enumerations = text_or_node.traverse(utils.is_list_or_enumeration) for enumeration in enumerations: worked_strings += utils.parse_list_items(text_or_node) self._parse_worked_strings(worked_strings, submodule)
def _compute_sortkey(self): match = _REGEX_MAPPING[self.type].search(self.name) key = "4" latin_number = 0 if self.name != "R": latin_number = roman.fromRoman(match.group(1)) if self.type == VolumeType.FIRST_SERIES: key = f"1_{latin_number:02d}_{match.group(2)}" elif self.type == VolumeType.SECOND_SERIES: key = f"2_{latin_number:02d}_{match.group(2)}" elif self.type == VolumeType.SUPPLEMENTS: key = f"3_{latin_number:02d}" return key
def split_name_and_submodule(name): res = submodule_split_re.match(name) parts = list(res.groups()) parts[0] = parts[0].strip() if len(parts[1]) == 0: parts = [parts[0]] else: submodules = submodules_split_re.split( parts[1]) parts = [parts[0]] + [ roman.fromRoman(submodule.upper()) - 1 for submodule in submodules] return parts
def rewrite_roman_numerals(self, text): if self.refUnits == []: return text romanRef = re.compile(r"\b(" + "|".join(self.refUnits) + ") " + self.romanPattern + r"\b") matches = romanRef.findall(text) for unit, romanNumeral in matches: try: transform = roman.fromRoman(romanNumeral.upper()) number = str(transform) text = re.sub(r"\b" + romanNumeral + r"\b", number, text) #print "unit:", unit, "Roman Numeral:", romanNumeral, "number:", number, "context:", text except roman.InvalidRomanNumeralError: pass return text
def traiter_nombres(chaine): chaine = chaine.replace(' ','') global seps i = -1 sep = 0 # position du separateur #1. convertir le nombre romain en entier for c in chaine: #parcourir la chaine i = i + 1 if c in seps: morceau = chaine[sep:i].upper() #le morceau à traiter try: chaine = chaine[:sep] + str(fromRoman(morceau)) + chaine[i:] except: pass sep = i+1 if sep == 0: try: chaine = str(fromRoman(chaine)) except: pass chaine = supprimer_sep_nombre(chaine) return chaine
def parse_dependencies(self, text_or_node, submodule): submodule = roman.fromRoman(submodule) - 1 dep_strings = [] if isinstance(text_or_node, basestring): dep_strings = [text_or_node] else: enumerations = text_or_node.traverse(utils.is_list_or_enumeration) for enumeration in enumerations: dep_strings += utils.parse_list_items(text_or_node) if submodule not in self.dependencies: self.dependencies[submodule] = [] self.dependencies[submodule] += dep_strings
def token_features(tok_input): tok = tok_input.strip() parts = tok.split('.') # Mark special sections if tok == "Abstract" or tok == "ABSTRACT" or tok == "Acknowledgement" or tok == "ACKNOWLEDGEMENT" or tok == "References" or tok == "Reference" or tok == "REFERENCE" or tok == "REFERENCES" or tok == "Acknowledgements" or tok == "ACKNOWLEDGEMENTs": return "6" # Mark special symbol '$$$' if tok == "$$$": return "5" # Mark table/figure headings if tok == "Table" or tok == "TABLE" or tok == "Figure" or tok == "FIGURE" or tok == "Fig.": return "0" p_len = len(parts) # Digit tokens if p_len == 1: if tok.isdigit() and 1 <= int(tok) <= 20: return "1" if p_len == 2 or p_len == 3: if parts[0].isdigit() and 1 <= int(parts[0]) <= 20: if parts[1] == '' or (parts[1].isdigit() and int(parts[1]) <= 20): if p_len == 2: return "1" if parts[1].isdigit() and int(parts[1]) <= 20 and parts[2] == '': return "1" # Handling Roman Numericals if p_len == 1 or (p_len == 2 and parts[1] == ''): try: val = roman.fromRoman(parts[0].upper()) if val <= 20: return "1" except: pass if ((len(parts[0]) == 1 and 'A' <= parts[0] <= 'Z') or ( len(parts[0]) == 3 and parts[0][0] == '(' and parts[0][2] == ')' and parts[0][ 1].isalpha() and parts[0][1].isupper()) or ( len(parts[0]) == 2 and parts[0][1] == ')' and parts[0][0].isalpha() and parts[0][ 0].isupper())): if p_len == 1 or (p_len == 2 and parts[1] == ''): return "1" # Handling Capitalizations if tok[0].isupper(): return "2" if not (parts[0].isalpha() or parts[0].isdigit()): return "3" return "4"
def solve(ques): # We only need lhs ques = ques.split("=")[0] # We don't need commas ques = ques.repace(',', '') parsed = '' # Let's separate parts between operators groups = re.split('([\-\+\*\/\(\)])', ques) # Convert each part to a number for g in groups: g = g.strip() # If part a roman numeral, convert it to decimal if re.match('([A-Z]+)', g): parsed += "%s" % roman.fromRoman(g) # If part is in plain english, convert it to decimal elif re.match('([a-z]+)', g): parsed += "%s" % text2int(g) else: parsed += g # Now eval can do the rest return "%s\r\n" % (eval(parsed))
def decide_REIA_or_REWL(re_sub): test1 = re_sub.group(0) searcher = FIT.search(re_sub.group(0)) (arg1, arg2, arg3) = build_arg(searcher) if searcher.group(1) == 'archive': # archive return '{{REIA|%s|%s}}' % (arg1, arg2) else: # wikilivre arabic = roman.fromRoman(searcher.group(3)) subl = searcher.group(2) app = searcher.group(4) halfband = searcher.group(5) if (app is None) and (subl is None) and (arabic < 12): # all between I and XI return '{{REIA|%s|%s}}' % (arg1, arg2) # I A,1 and II A,1 elif (subl is None) and (app == 'A') and (arabic < 3) and (halfband == '1'): return '{{REIA|%s|%s}}' % (arg1, arg2) # S I, S II, S III elif (subl == 'S') and (app is None) and (arabic < 4) and (halfband is None): return '{{REIA|%s|%s}}' % (arg1, arg2) else: # rest return '{{REWL|%s|%s}}' % (arg1, arg2)