Beispiel #1
0
def digit2word(key):
    key=key.replace(' U.S.','').replace(',','').replace('.','').strip()
    if key.replace('0','')=='':
        return 'zero'
    if key=='-0':
        return 'minus zero'
    try:
        if key.endswith("'s"):
            return digit2word(key[:-2])+"'s"
        if key.endswith("s"):
            return digit2word(key[:-1])+"'s"
        key=str(roman.fromRoman(key.replace('IIII','IV')))
    except:
        pass
    try:
        key=str(roman.fromRoman(key.split()[0].replace('IIII','IV')))
    except:
        pass
    try:
        text = p.number_to_words(key,decimal='point',andword='', zero='o')
        if re.match(r'^0\.',key): 
            text = 'zero '+text[2:]
        if re.match(r'.*\.0$',key): text = text[:-2]+' zero'
        text = text.replace('-',' ').replace(',','')
        return text.lower()
    except: return key
Beispiel #2
0
def testFromRomanCase():
    """fromRoman should only accept uppercase input"""
    for integer in range(1, 5000):
        numeral = roman.toRoman(integer)
        roman.fromRoman(numeral.upper())
        with pytest.raises(roman.InvalidRomanNumeralError):
            roman.fromRoman(numeral.lower())
Beispiel #3
0
def roman2dec() -> int:
	"""
	Entry point for `se roman2dec`
	"""

	import roman

	parser = argparse.ArgumentParser(description="Convert a Roman numeral to a decimal number.")
	parser.add_argument("-n", "--no-newline", dest="newline", action="store_false", help="don’t end output with a newline")
	parser.add_argument("numbers", metavar="NUMERAL", nargs="+", help="a Roman numeral")
	args = parser.parse_args()

	lines = []

	if not sys.stdin.isatty():
		for line in sys.stdin:
			lines.append(line.rstrip("\n"))

	for line in args.numbers:
		lines.append(line)

	for line in lines:
		try:
			if args.newline:
				print(roman.fromRoman(line.upper()))
			else:
				print(roman.fromRoman(line.upper()), end="")
		except roman.InvalidRomanNumeralError:
			se.print_error("Not a Roman numeral: {}".format(line))
			return se.InvalidInputException.code

	return 0
 def testFromRomanCase(self):
     """fromRoman should only accept uppercase input"""
     for integer in range(1, 4000):
         numeral = roman.toRoman(integer)
         roman.fromRoman(numeral.upper())
         self.assertRaises(roman.InvalidRomanNumeralError,
                           roman.fromRoman, numeral.lower())
Beispiel #5
0
def infer_production_century(br, de):
    """
    Estimate the production century for an author.
    If born on or after the 80th year of a century,
    say it's the following century.
    If no dates (only centuries), and centuries differ,
    output a .5 date (e.g. birth XVI, death XVII => 16.5).
    Output False when birth or death date missing.
    """
    if str(br) == '0' or str(de) == '0':
        return False
    try:
        byear = int(br)
        dyear = int(de)
    except ValueError:
        try:
            byear = roman.fromRoman(br)
            dyear = roman.fromRoman(de)
            if byear == dyear:
                pcen = byear
            else:
                assert dyear == byear + 1
                pcen = byear + 0.5
            return pcen
        except roman.InvalidRomanNumeralError:
            return False
    if byear % 100 >= 80:
        pcen = floor(byear / 100) + 2
        assert floor(dyear / 100) + 1 == pcen
    else:
        pcen = floor(byear / 100) + 1
    return pcen
Beispiel #6
0
 def testFromRomanCase(self):
     """fromRoman should only accept uppercase input"""
     for integer in range(1, 5000):
         numeral = roman.toRoman(integer)
         roman.fromRoman(numeral.upper())
         self.assertRaises(roman.InvalidRomanNumeralError, roman.fromRoman,
                           numeral.lower())
 def is_roman_number(roman_as_str):
     """ returns True in case provided value is valid roman number, False otherwise """
     try:
         fromRoman(roman_as_str)
     except InvalidRomanNumeralError:
         return False
     else:
         return True
Beispiel #8
0
    def roman_magic(self, v1, op, v2):

        rom_v1 = str(fromRoman(v1))
        rom_v2 = str(fromRoman(v2))

        operation = rom_v1+op+rom_v2

        return toRoman(eval(operation))
Beispiel #9
0
	def testFromRomanCase(self):
		"""fromRoman should only accept uppercase input"""
		for integer in range(1, 5000):
			numeral = roman.toRoman(integer)
			roman.fromRoman(numeral.upper())                        # assume nothing about toRoman() in the fromRoman() test!
                                                                    # also, this is an implicit test that this line does NOT raise
			self.assertRaises(roman.InvalidRomanNumeralError, 
							  roman.fromRoman, numeral.lower())
Beispiel #10
0
def is_roman(goo):
    try:
        roman.fromRoman(goo)
        result = True
    except roman.InvalidRomanNumeralError:
        result = False

    return result
Beispiel #11
0
 def isRoman(txt):
     # helper function, checks if string contains valid roman number
     # uses pip module roman: https://pypi.python.org/pypi/roman
     try:
         roman.fromRoman(txt)
         return True
     except roman.InvalidRomanNumeralError:
         return False
Beispiel #12
0
def replace_chapter_number(match):
    string = match.group(1)
    try:
        string = str(roman.fromRoman(string))
    except roman.InvalidRomanNumeralError:
        if string.startswith('L'):
            string = str(roman.fromRoman(string[1:]))
    return f'Chapitre {string}.'
Beispiel #13
0
 def start_end(self):
     match = PAGE_ORDER_RX.match(self.name)
     if match is not None:
         start = match.group("start")
         end = match.group("end")
         if start.isdigit():
             return int(start), int(end)
         start = roman.fromRoman(start)
         end = roman.fromRoman(end)
         return start, end
def is_roman(text: str) -> bool:
    """Check if the given string is a Roman number. Return True if it is, False if not.
    """
    text = text.strip().upper()
    text = re.sub(r'[^A-Z]', "", text)
    try:
        fromRoman(text)
    except NoRoman:
        return False
    else:
        return True
Beispiel #15
0
def getMinimalFormOfNumeral(numeral):
    value = 0

    while (len(numeral) > 1):
        if (roman.fromRoman(numeral[0]) < roman.fromRoman(numeral[1])):
            value -= roman.fromRoman(numeral[0])
        else:
            value += roman.fromRoman(numeral[0])
        numeral = numeral[1:]

    value += roman.fromRoman(numeral[0])
    return roman.toRoman(value)
Beispiel #16
0
def convertToNum(input_type, s):
    if input_type == "R":
        num = roman.fromRoman(s)
    elif input_type == "r":
        num = roman.fromRoman(s.upper())
    elif input_type == "a" or input_type == "A":
        num = fromCharString(s.lower())
    elif input_type == "i" or input_type == "f":
        num = float(s)
    else:
        num = 0
    return num
Beispiel #17
0
 def replace_roman_numerals_in_match(m):
     s = m.group(3)
     s = s.upper()
     try:
         if s:
             if m.group(8):    
                 return u"{}{}:{}".format(m.group(1), roman.fromRoman(s), m.group(8))
             else:
                 return u"{}{}{}".format(m.group(1), roman.fromRoman(s), m.group(7))
         else:
             return m.group(0)
     except:
         return m.group(0)
def romanDigitRangeCorrector(cite_in):
    dash_expression = re.compile('[-–—]')
    if dash_expression.search(cite_in):

        first_roman_num = re.split(dash_expression, cite_in)[0]
        second_roman_num = second_num = re.split(dash_expression, cite_in)[1]
        first_num = str(roman.fromRoman(first_roman_num.upper()))
        second_num = str(roman.fromRoman(second_roman_num.upper()))
        arabic_range = first_num + '-' + second_num

        return arabic_range
    else:
        return str(roman.fromRoman(cite_in.upper()))
    def normalize_string(self, input_string):
        """
        As we have several variations of the Canon Mark models we have to clean it up
        
        1) break down the input string into tokens
        2) check if one of those tokens is a variation of "Mark" (i.e. mark, mk, MarkIII, 5dmark, etc)
        3) check if the mark model number is in roman and covert it to INT
        4) eliminating spaces and lowering case
        """

        tokens = [t for t in input_string.split()]
        normalized_tokens = []
        lookup_pattern = re.compile("(m(?:ar)?k\s*)([ivx\d]*)",
                                    flags=re.IGNORECASE)
        roman_nums = roman.romanNumeralPattern

        for i, token in enumerate(tokens, 0):

            # check if the token is a variation of mark
            lookup = lookup_pattern.search(token)
            if lookup:
                # some strings have the 5d concatenated to the model, let's separate it
                if re.search('5d', token, re.IGNORECASE):
                    token = '5D Mark'
                else:
                    token = 'Mark'
                model_nbr = lookup.group(2)

                # check if the model number is part of the same token and covert it to ,INT
                if model_nbr != '':
                    if model_nbr.isdigit():
                        pass
                    else:
                        model_nbr = roman.fromRoman(model_nbr.upper())
                        token = token + ' ' + str(model_nbr)

            # check if the token is a roman numeral and covert it to INT
            is_roman = roman_nums.search(token)
            if is_roman:
                token = str(roman.fromRoman(token))

            normalized_tokens.append(token)

        # convert the list of normalized tokens into a single string and normalize it
        output_string = ' '.join(normalized_tokens)
        output_string = output_string.strip()
        output_string = output_string.lower()
        output_string = output_string.replace(" ", "")
        return output_string
Beispiel #20
0
def onLine(l):
    global PLAY
    global PLAY_LINES
    global line
    global inAct
    global inScene
    global inCharacter
    global currCharacter
    global firstBlock
    l = l.rstrip()
    if isSection(l, START_ACT):
        inAct = roman.fromRoman(l.split()[1])
        if DEBUG:
            print('Act', inAct)
        createIfEmpty(PLAY, inAct)
        inScene = False
        inCharacter = False
        return
    if isSection(l, START_SCENE):
        inScene = roman.fromRoman(l.split()[1][:-1])
        if DEBUG:
            print('Scene', inScene)
        createIfEmpty(PLAY[inAct], inScene)
        PLAY[inAct][inScene] = []
        inCharacter = False
        line = 0
        return
    if isSection(l, START_CHARACTER) and \
            l[len(START_CHARACTER) + 1] != ' ' and \
            len([word for word in l.split() if word[0].isupper()]) == len(l.split()):
        flushCharacter()
        inCharacter = l.strip().upper()
        currCharacter = ''
        firstBlock = True
        if DEBUG:
            print('Character', inCharacter)
        return
    if inAct and inScene and inCharacter and firstBlock:
        if len(l.strip()) == 0 and firstBlock:
            if len(currCharacter) > 0:
                firstBlock = False
            return
        line += 1
        PLAY_LINES[''.join(
            [str(inAct),
             str(inScene), inCharacter,
             l.strip().upper()])] = line
        currCharacter += l + '\n'
Beispiel #21
0
    def _ordinal2word(wordsList, indice):
        """Convert an ordinal number to a written
           word.

           i.e. 1er --> premier

           param strNumber: an utf-8 ordinal number
           return a 'written' ordinal number
        """
        strNumber = NumberFormula._normalizeNumber(wordsList[indice])
        if strNumber.encode('utf-8') == u"1ère".encode('utf-8'):
            return u"première"

        strNewNumber = re.sub(u"[erèm]", "", strNumber)
        if NumberFormula._isCardinalNumber(strNewNumber):
            strNewNumber = num2words(int(strNewNumber), ordinal=True, lang='fr')
        elif NumberFormula._isRomanNumber(strNewNumber):
            #Roman to cardinal
            strNewNumber = strNewNumber.encode('utf-8')
            cardinalNumber = fromRoman(strNewNumber)
            #Digits to ordinal
            strNewNumber = num2words(cardinalNumber, ordinal=True, lang='fr')
        else:
            strNewNumber = strNumber

        strNewNumber = re.sub(r'vingtsi','vingti',strNewNumber)
        strNewNumber = re.sub(r'centsi','centi',strNewNumber)
        strNewNumber = re.sub(r'millionsi','millioni',strNewNumber)
        strNewNumber = re.sub(r'milliardsi','milliardi',strNewNumber)
        
        return strNewNumber
def get_orf_data(chromo, start, end, orf_annotation = None):
    
    if orf_annotation is None:
        sys.exit("For ORF annotation, a GFF file needs to be provided. Such as this one: https://github.com/jianlingzhong/COMPETE_examples/blob/master/saccharomyces_cerevisiae.20080621.gff")

    genes = pd.read_csv(orf_annotation, sep = '\t', comment='#', header = None)

    # only need the gene features in the gff
    genes = genes.loc[genes.iloc[:, 2] == 'gene', ]

    # don't need Mito and 2-micron chromosome features
    genes = genes.loc[(genes.iloc[:, 0] != 'chrMito') & (genes.iloc[:, 0] != '2-micron')]

    # just need the following columns
    genes = genes.iloc[:, [0, 3,4,6,8]]
    genes.rename(columns={0:'chromo', 3:'start', 4:'end', 6:'strand', 8:'name'}, inplace = True)

    # convert 'chrI' to 1
    genes.chromo = genes.chromo.apply(lambda x: roman.fromRoman(x[3:]))

    genes = genes.loc[(genes.chromo == chromo)]
    selection = ~((genes.end < start) | (genes.start > end))
    genes = genes.loc[selection]
    genes.name = genes.name.apply(find_name)

    return genes
Beispiel #23
0
    def _parse_stenogram_meta(self, response, meta_xs):
        meta = {}
        source = self._get_source(response.url, 'p_id')
        meta['source'] = source
        meta['_id'] = source['id']
        date_match = meta_xs.re(date_re)
        if date_match:
            year = int(date_match[0])
            month = month_names_map[date_match[1]]
            day = int(date_match[2])
            meta['date'] = date(year, month, day)
        sitting_match = meta_xs.re(sitting_re)
        if sitting_match:
            meta['sitting_time'] = sitting_match[0].title()
            meta['sitting_no'] = sitting_match[1]
        else:
            sitting_no_match = meta_xs.re(sitting_no_re)
            if sitting_no_match:
                meta['sitting_no'] = sitting_no_match[0]
        session_match = meta_xs.re(session_re)
        if session_match:
            meta['session_no'] = roman.fromRoman(session_match[0])
            meta['session_season'] = session_match[1].title()

            if 'date' in meta:
                meta['session'] = '{} {}'.format(meta['date'].year,
                                                 meta['session_season'])

        return meta
Beispiel #24
0
 def sort_collections(t):
     sort_key = t[0]
     if ": Chartae Latinae" in t[0]:
         key_parts = sort_key.split()
         key_parts[-1] = '{:04}'.format(roman.fromRoman(key_parts[-1]))
         sort_key = ' '.join(key_parts)
     return sort_key
Beispiel #25
0
def nameclean(orig):
    name = orig.encode('ascii', errors='ignore').decode('ascii')
    name = name.upper().strip()
    name = name.replace('GOTY', 'GAME OF THE YEAR')
    ignoredphrases = [
        '\(', '\)', 'the', 'with', 'Early Access', 'bundle', 'and', '&', 'in',
        'vr', 'beta', 'Double Pack', 'Pack', 'Free to Play', 'Edition'
    ]
    for phrase in ignoredphrases:
        name = re.sub(r'\b{0}\b'.format(phrase), '', name, flags=re.IGNORECASE)
    name = re.sub(r'\bone\b', '1', name, flags=re.IGNORECASE)
    name = re.sub(r'\btwo\b', '2', name, flags=re.IGNORECASE)
    name = re.sub(r'\b40k\b', '40,000', name, flags=re.IGNORECASE)
    name = re.sub(r'\+', 'PLUS', name, flags=re.IGNORECASE)
    name = re.sub(r'[^\w\s]', ' ', name, flags=re.IGNORECASE)
    name = re.sub(
        r'(\A|\s)(\b(?=[MDCLXVI]+\b)M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\b)(\s|\Z)',
        lambda x: x.group(1) + str(roman.fromRoman(x.group(2).strip(
        ))) + x.group(6) if (x.group(0).strip()) else '',
        name,
        flags=re.IGNORECASE)
    name = re.sub(r'(\w*)s\b', r'\1', name, flags=re.IGNORECASE)
    words = name.split()
    name = ' '.join(sorted(set(words), key=words.index))
    name = re.sub(r'\s+', '', name, flags=re.IGNORECASE).strip()
    return name
Beispiel #26
0
def setValues(siglum):
    ''' Take Roman numerals in the text (already marked up with <num>)
        and insert a @value attribute with its arabic value.
        The 'siglum' is the XML filename to parse ('g' for 'g.xml'; 'bonetti' for 'bonetti.xml' etc.).
        Then write the modified tree to another XML file ('numerals-g.xml'; 'numerals-bonetti.xml' etc.).
        '''
    n = myconst.ns
    tree = etree.parse('../xml/%s.xml' % siglum)
    numbers = tree.findall('.//t:num', n)
    for number in numbers:
        if number.get('value') is None:
            content = getRomanContent(number, checkallnumbers=False)
            if content == '':
                print('foo', number.text)
            # print(content.upper(), end='\t')  # debug
            try:
                myvalue = roman.fromRoman(content.upper())
                #print(roman.fromRoman(content.upper()))    # debug
                number.set('value', str(myvalue))
                number.set('type', 'guessedvalue')
            except roman.InvalidRomanNumeralError:
                print('Numero romano non parsabile:', content.upper())
                number.set('type', 'foo')
        else:
            #print('Il valore di @value era già settato a', number.get('value'))
            pass

    tree.write('../xml/numerals-%s.xml' % (siglum),
               encoding='UTF-8',
               method='xml',
               pretty_print=True,
               xml_declaration=True)
Beispiel #27
0
def token_features(y):
    x=y.strip()
    parts=x.split('.',2)
    pattern = re.compile('[1-9][0-9]*(\.)?([1-9][0-9]*(\.([1-9][0-9]*)?)?)?')
    m = pattern.match(x)
    if x=="Table" or x=="TABLE" or x=="Figure" or x=="FIGURE" or x=="Fig." or x=="FIG.":
        return "0"
    if m and m.span()==(0, len(x)):
        return "1"
    try:
        roman.fromRoman(parts[0].upper())
        return "1"
    except:
        if x[0].isupper():
            return "2"
        return "3"
Beispiel #28
0
def sgd_gff2dataframe(input_annot_gff_filename, ok_features_list, include_dubious=False):
    """
    loads sgd annotation gff file to dataframe\
   leave only features of the types in the input list
    filters mitochondrial and 2-micron genes
    """
    gff_df = gff2dataframe(input_annot_gff_filename)

    gff_df['Name'] = gff_df['Name'].str.strip()

    #gff_df = gff_df[gff_df['feature'].isin(['gene','CDS'])  ]
    # just coding sequence
    gff_df = gff_df[gff_df['feature'].isin(ok_features_list)  ]   # ['CDS']

    # filtering dubious and not in standard chromosomes
    if include_dubious:
      idx =  ~gff_df['chrom'].isin(['chrMito', '2-micron'])
    else:
      idx = gff_df['Note'].apply(lambda x: not str(x).startswith('Dubious')) & ~gff_df['chrom'].isin(['chrMito', '2-micron'])
    gff_df = gff_df[idx]

    # fixing chromosome to be chrXX where XX is integer 
    gff_df['chrom'] = gff_df['chrom'].apply(lambda x: 'chr' + str(fromRoman(str(x).replace('chr',''))).zfill(2) )

    # removing "SGD:" from the id column
    gff_df['dbxref'] = gff_df['dbxref'].apply(lambda x: str(x).replace('SGD:',''))
    gff_df['start'] = gff_df['start'].apply(int)
    

    
    return(gff_df)
Beispiel #29
0
def arabicOrRomanToInt(s):
    m = re.match(r'\d+$', s)
    if m:
        i = int(s)
    else:
        i = roman.fromRoman(s)
    return i
def replace_ordinal(token):
    """10.序数词"""
    res = token.strip()
    replace_list = [
        "th", "TH", "Th", 'st', 'ST', 'nd', 'ND', 'rd', 'RD', ".", "º", ",",
        "ª"
    ]
    ends = ""
    if res.endswith("'s"):
        ends = "'s"
        res = res[:-2]
    if res.endswith("s"):
        ends = "s"
        res = res[:-1]
    for one in replace_list:
        res = res.replace(one, "").strip()
    if res.isdigit():
        res = numstr2word(float(res), ordinal=True)
    else:
        # 可能是罗马数字
        try:
            num = roman.fromRoman(res)
            res = numstr2word(num, ordinal=True)
            res = "the " + res
        except roman.InvalidRomanNumeralError:
            pass
            # print("roman.InvalidRomanNumeralError", res)
    return res + ends
Beispiel #31
0
def load():
    a=pd.read_csv('/home/arya/storage/Data/Yeast/BurkeYeast/freq_tables/snps.txt',sep='\t').rename(columns={'chr':'CHROM','pos':'POS'}).set_index(['CHROM','POS']).iloc[:,2:]
    c=pd.Series(a.columns)
    founders=a[c[c.apply(lambda x:  x[0]=='F')]]
    founders.loc[:,map(lambda x:'cov' in x,founders.columns)].sum(1)
    def toint(x):
        try:return int(x)
        except: return x

    cd=a[c[c.apply(lambda x: 'maf' ==x[:3]) |c.apply(lambda x: 'cov' ==x[:3])]]
    cd.columns=map(lambda x:tuple(map(lambda y: toint(y.replace('cov','D').replace('maf','C')) ,x.split('_'))), cd.columns)
    cd.columns=pd.MultiIndex.from_tuples(cd.columns)
    cd=pd.concat([(cd['C']* cd['D']).round().astype(int),cd['D']],1,keys=['C','D']).reorder_levels([1,2,0],axis=1)
    cd.columns.names=['REP','GEN','READ']

    ancestors=a[c[c.apply(lambda x: 'anc' in  x)]].rename(columns={'anc_maf':'C','anc_cov':'D'})
    ancestors['C']=(ancestors['C']*ancestors['D']).round().astype(int)
    ancestors=pd.concat([ancestors],1,keys=[0])
    R=cd.columns.levels[0]
    ancestors=pd.concat([ancestors for _ in R],1,keys=R)
    ancestors.columns.names=['REP','GEN','READ']
    cd=pd.concat([cd,ancestors],1).sort_index(1).sort_index()
    weekToGen={0:0, 6:180, 12:360, 18:540}
    cd=cd.T.reset_index()
    cd.GEN=cd.GEN.replace(weekToGen)
    cd=cd.set_index(['REP','GEN','READ']).T.sort_index(1).sort_index()
    try:
        import roman
        cd=cd.reset_index()
        cd.CHROM=cd.CHROM.apply(lambda x:roman.fromRoman(x[3:]))
        cd=cd.set_index(['CHROM','POS'])
    except:
        pass
    return
def replace_cardinal(token):
    """5.普通数字"""
    token = token.replace('U.S.', "")
    token = token.replace(" ", "")
    token = token.replace('"', "")
    token = token.replace(',', "")
    token = token.replace('.', "")
    token = token.replace(':', "")
    minus = False
    if len(token) == 0:
        return ""
    if token.startswith("-"):
        minus = True
        token = token[1:]
    token = token.replace('-', "")
    ends = ""
    if token.endswith("'s"):
        ends = "'s"
        token = token[:-2]
    elif token.endswith("A") or token.endswith("M"):
        token = token[:-1].strip()
    if token.isdigit():
        res = numstr2word(token)
    else:
        # 可能是罗马数字
        try:
            num = roman.fromRoman(token)
            res = numstr2word(num)
        except roman.InvalidRomanNumeralError:
            res = token
    if minus:
        res = "minus " + res
    res += ends
    return res
Beispiel #33
0
def arabicOrRomanToInt(s):
    m = re.match(r'\d+$', s)
    if m:
        i = int(s)
    else:
        i = roman.fromRoman(s)
    return i
Beispiel #34
0
 def numeric(x):
     if x[0].isdigit():
         return int(x) + Util.INTEGER_OFFSET
     try:
         return roman.fromRoman(x)
     except roman.RomanError:
         return 0
    def testSanity(self):
        """fromRoman(toRoman(n)) == n for all n"""

        for integer in range(1, 5000):
            numeral = roman.toRoman(integer)
            result = roman.fromRoman(numeral)
            self.assertEqual(integer, result)
Beispiel #36
0
    def _ordinal2word(wordsList, indice):
        """Convert an ordinal number to a written
           word.

           i.e. 1er --> premier

           param strNumber: an utf-8 ordinal number
           return a 'written' ordinal number
        """
        strNumber = NumberFormula._normalizeNumber(wordsList[indice])
        if strNumber.encode('utf-8') == u"1st".encode('utf-8'):
            return u"first"

        strNewNumber = re.sub(u"[ndstrh]", "", strNumber)
        # print strNewNumber
        # if NumberFormula._isCardinalNumber(strNewNumber):
        if strNewNumber.isdigit():
            strNewNumber = num2words(int(strNewNumber), ordinal=True)
            # print(strNewNumber)
        elif NumberFormula._isRomanNumber(strNewNumber):
            #Roman to cardinal
            strNewNumber = strNewNumber.encode('utf-8')
            # print strNewNumber
            cardinalNumber = fromRoman(strNewNumber)
            #Digits to ordinal
            strNewNumber = num2words(cardinalNumber, ordinal=True)
        else:
            print("newnumberis not digit!!!")
            strNewNumber = strNumber

        return strNewNumber
def get_unit(title):
    pattern = patterns.makeTitlePatterns()
    title = title.lower()
    unit = title.replace(" ", "_")
    unit = re.sub("[ \[\]\(\)]", "_", title)
    number = "1"
    for unitForm, unitPattern in pattern.iteritems():
        m = unitPattern.match(title)
        if m:
            if unitForm == "title_preamble":
                unit = m.group(1)
                number = "1"
            elif unitForm == "title_number":
                unit = "unspecified"
                number = m.group(1)
            elif unitForm == "title_unit_roman":
                unit = m.group(1)
                try:
                    number = str(roman.fromRoman(m.group(2).upper()))
                except roman.InvalidRomanNumeralError:
                    print "Invalid Roman title number:", title, "unit:", unit, "number:", number
                    number = m.group(2)
            else:
                unit = m.group(1)
                number = m.group(2)
                if unit == "art":
                    unit = "article"
    return unit, number
def tb_heading_features(y):
    x=y.strip()
    parts=x.split('.',2)
    pattern = re.compile('[1-9][0-9]*(\.)?([1-9][0-9]*(\.([1-9][0-9]*)?)?)?')
    m = pattern.match(x)
    if x=="Table" or x=="TABLE" or x=="Figure" or x=="FIGURE" or x=="Fig." or x=="FIG.":
        return "0"
    if m and m.span()==(0, len(x)):
        return "1"
    try:
        roman.fromRoman(parts[0].upper())
        return "1"
    except:
        if x[0].isupper():
            return "2"
        return "3"
Beispiel #39
0
    def maybe_normalize(self,
                        value,
                        mapping='',
                        roman_normalization=True,
                        mapping_append=True):

        if mapping == '':
            mapping = self.default_mapping
        else:
            mapping = mapping + self.default_mapping if mapping_append else self.default_mapping + mapping

        for norm in mapping:
            if type(norm[0]) == str:
                value = value.replace(norm[0], norm[1])
            elif isinstance(norm[0], Pattern):
                value = norm[0].sub(norm[1], value)
            else:
                print('UNEXPECTED', type(norm[0]), norm[0])

        if roman_normalization:
            for ro_before, ro_after, ro in self.get_roman_numbers(value):
                try:
                    value = value.replace(
                        ro_before + ro + ro_after,
                        ro_before + str(roman.fromRoman(ro)) + ro_after)
                except roman.InvalidRomanNumeralError as ex:
                    print(ex)
                    pass

        value = self.clean_single_line(value)

        return value.replace('  ', " ")
def token_features(y):
    x=y.strip()
    parts=x.split('.')
    if(x=="$$$"):
        return "5"
    if x=="Table" or x=="TABLE" or x=="Figure" or x=="FIGURE" or x=="Fig.":
        return "0"
    p_len = len(parts)
    if(p_len==1):
        if(x.isdigit() and 1<=int(x)<=20):
            return "1"
    if(p_len==2 or p_len==3):
        if(parts[0].isdigit() and 1<=int(parts[0])<=20):
            if(parts[1]=='' or (parts[1].isdigit() and int(parts[1])<=20)):
                if(p_len==2):
                    return "1"
                if(parts[1].isdigit() and int(parts[1])<=20 and parts[2]==''):
                    return "1"
    if(p_len==1 or (p_len==2 and parts[1]=='')):
        try:
            val = roman.fromRoman(parts[0].upper())
            if(val<=20):
                return "1"
        except:
            None
        if((len(parts[0])==1 and 'A'<=parts[0]<='Z') or (len(parts[0])==3 and parts[0][0]=='(' and parts[0][2]==')' and parts[0][1].isalpha() and parts[0][1].isupper()) or (len(parts[0])==2 and parts[0][1]==')' and parts[0][0].isalpha() and parts[0][0].isupper())):
            if(p_len==1 or (p_len==2 and parts[1]=='')):
                return "1"
    if x[0].isupper():
        return "2"
    if (not(parts[0].isalpha() or parts[0].isdigit())):
        return "3"
    return "4"
Beispiel #41
0
 def get_spenser(self):
     '''
     Returns a dictionary of sonnets.
     Each element of the dict is one of the sonnets, keyed by sonnet number
     Each sonnet is a list of lists of lines
     '''
     filename = 'data/spenser.txt'
     num = 0
     sonnets = {}
     sonnet_lines = []
     with open(filename) as f:
         for line in f:
             line = re.sub('[:;.()?!]', '', line.strip()).lower()
             try:
                 num = int(roman.fromRoman(line.upper()))
                 sonnets[num] = []
                 sonnet_lines = []
                 continue
             except ValueError:
                 pass
             except roman.InvalidRomanNumeralError:
                 pass
             next_line = line.split()
             if len(next_line) > 0:
                 sonnets[num].append(next_line)
     return sonnets
def getSortedList_easy(lst):

    """
    Uses pre-made module Roman for speed comparision/ to check if conversion is correct
   :param lst: List of Stringed RoyalNames
    :return: List of Sorted RoyalNames
    """

    if all(isinstance(x, str) for x in lst):
        roman_to_int_convertedlst = []
        int_to_roman_convertedlst = []

        for name,ordinal in [royalname.split() for royalname in lst]: #split the name and roman number
            newnumber = roman.fromRoman(ordinal)
            royalname = name, newnumber
            roman_to_int_convertedlst.append(royalname)
        print("Converted from", lst, "\n"
              "to", roman_to_int_convertedlst)
        sortedlst = sorted(roman_to_int_convertedlst)
        print("Sorted", sortedlst)

        for name,integer in sortedlst:
            roman_numeral = roman.toRoman(integer)
            int_to_roman_convertedlst.append(name+" "+str(roman_numeral))

    return int_to_roman_convertedlst
Beispiel #43
0
 def replace_roman_numerals_in_match(m):
     s = m.group(2)
     s = s.upper()
     try:
         if s:
             return "%s%s:%s" % (m.group(1), roman.fromRoman(s), m.group(7))
     except:
         return m.group(0)
Beispiel #44
0
def ps_event(numeral):
    try:
        number = roman.fromRoman(numeral.upper())
    except roman.InvalidRomanNumeralError:
        return "Invalid roman numeral!", 400

    event = ps_data.get_ps_event_by_number(number)
    return flask.render_template('event.html', event=event)
Beispiel #45
0
def parse_token(token):
    if token in '()+-*/':
        return token
    if len(token.strip('0123456789,')) == 0:
        return token.replace(',', '')
    if len(token.strip('IVXLCDM')) == 0:
        return str(roman.fromRoman(token))
    log.warning('Unknown token: ' + token)
    return raw_input()
Beispiel #46
0
    def _extract_charge(arg: str):
        """
        Receive a `str` representing an element, isotope, or ion.
        Return a `tuple` containing a `str` that should represent an
        element or isotope, and either an `int` representing the
        charge or `None` if no charge information is provided.  Raise
        an `~plasmapy.utils.InvalidParticleError` if charge information
        is inputted incorrectly.
        """

        invalid_charge_errmsg = (f"Invalid charge information in the particle string '{arg}'.")

        if arg.count(' ') == 1:  # Cases like 'H 1-' and 'Fe-56 1+'
            isotope_info, charge_info = arg.split(' ')

            sign_indicator_only_on_one_end = (
                    charge_info.endswith(('-', '+')) ^
                    charge_info.startswith(('-', '+')))

            just_one_sign_indicator = (
                    (charge_info.count('-') == 1 and
                     charge_info.count('+') == 0) or
                    (charge_info.count('-') == 0 and
                     charge_info.count('+') == 1))

            if not sign_indicator_only_on_one_end and just_one_sign_indicator:
                raise InvalidParticleError(invalid_charge_errmsg) from None

            charge_str = charge_info.strip('+-')

            try:
                if roman.romanNumeralPattern.match(charge_info):
                    Z_from_arg = roman.fromRoman(charge_info) - 1
                elif '-' in charge_info:
                    Z_from_arg = - int(charge_str)
                elif '+' in charge_info:
                    Z_from_arg = int(charge_str)
                else:
                    raise InvalidParticleError(invalid_charge_errmsg) from None
            except ValueError:
                raise InvalidParticleError(invalid_charge_errmsg) from None

        elif arg.endswith(('-', '+')):  # Cases like 'H-' and 'Pb-209+++'
            char = arg[-1]
            match = re.match(f"[{char}]*", arg[::-1])
            Z_from_arg = match.span()[1]
            isotope_info = arg[0:len(arg) - match.span()[1]]

            if char == '-':
                Z_from_arg = -Z_from_arg
            if isotope_info.endswith(('-', '+')):
                raise InvalidParticleError(invalid_charge_errmsg) from None
        else:
            isotope_info = arg
            Z_from_arg = None

        return isotope_info, Z_from_arg
    def _roman2word(strNumber):
        """Convert a roman number to a written
           word.

           param strNumber: an utf-8 roman number
           return a 'written' roman number
        """
        strNumber = strNumber.encode('utf-8')
        cardinalNumber = fromRoman(strNumber)
        return NumberFormula._cardinal2word(cardinalNumber)
Beispiel #48
0
 def markup(self):
     all = descend(self.dom,stackOfTags=[],seenSoFar=dict(),depth=1)
     returnable = []
     for key in all:
         line = dict(key)
         line["textString"] = all[key]
         if re.search(r"\w+",line["textString"]):
             try:
                 line["actNumber"] = roman.fromRoman(re.findall(r"(?:ACT|Act) +(\w+)",line["ACT"])[0])
             except KeyError:
                 pass
             try: 
                 line["sceneNumber"] = roman.fromRoman(re.findall(r"(?:SCENE|Scene) +([IXVL]+)",line["SCENE"])[0])
             except KeyError:
                 pass
             except:
                 pass
             returnable.append(line)
     return returnable
Beispiel #49
0
    def _roman2word(strNumber):
        """Convert a roman number to a written
           word.

           param strNumber: an utf-8 roman number
           return a 'written' roman number
        """
        strNumber = strNumber.encode('utf-8')
        cardinalNumber = fromRoman(strNumber)
        strNewNumber = num2words(cardinalNumber)
        return strNewNumber
Beispiel #50
0
 def _to_int(self, value):
     if isinstance(value, int):
         return value
     elif isinstance(value, basestring) and self.is_roman(value.upper()):
         return roman.fromRoman(value.upper())
     elif isinstance(value, RomanNumeral):
         return value._val
     else:
         raise ValueError("Value must be a valid roman numeral, a string"
                          " representing one or an integer: '{0}'"
                          .format(value))
    def parse_worked_on(self, text_or_node, submodule):

        submodule = roman.fromRoman(submodule) - 1

        worked_strings = []
        if isinstance(text_or_node, basestring):
            worked_strings = [text_or_node]
        else:
            enumerations = text_or_node.traverse(utils.is_list_or_enumeration)
            for enumeration in enumerations:
                worked_strings += utils.parse_list_items(text_or_node)

        self._parse_worked_strings(worked_strings, submodule)
Beispiel #52
0
 def _compute_sortkey(self):
     match = _REGEX_MAPPING[self.type].search(self.name)
     key = "4"
     latin_number = 0
     if self.name != "R":
         latin_number = roman.fromRoman(match.group(1))
     if self.type == VolumeType.FIRST_SERIES:
         key = f"1_{latin_number:02d}_{match.group(2)}"
     elif self.type == VolumeType.SECOND_SERIES:
         key = f"2_{latin_number:02d}_{match.group(2)}"
     elif self.type == VolumeType.SUPPLEMENTS:
         key = f"3_{latin_number:02d}"
     return key
def split_name_and_submodule(name):
    res = submodule_split_re.match(name)
    parts = list(res.groups())
    parts[0] = parts[0].strip()
    if len(parts[1]) == 0:
        parts = [parts[0]]
    else:
        submodules = submodules_split_re.split(
            parts[1])
        parts = [parts[0]] + [
            roman.fromRoman(submodule.upper()) - 1
            for submodule in submodules]
    return parts
	def rewrite_roman_numerals(self, text):
		if self.refUnits == []:
			return text
		romanRef = re.compile(r"\b(" + "|".join(self.refUnits) + ") " + self.romanPattern + r"\b")
		matches = romanRef.findall(text)
		for unit, romanNumeral in matches:
			try:
				transform = roman.fromRoman(romanNumeral.upper())
				number = str(transform)
				text = re.sub(r"\b" + romanNumeral + r"\b", number, text)
				#print "unit:", unit, "Roman Numeral:", romanNumeral, "number:", number, "context:", text
			except roman.InvalidRomanNumeralError:
				pass
		return text
Beispiel #55
0
def traiter_nombres(chaine):
	chaine = chaine.replace(' ','')
	global seps
	i = -1
	sep = 0 # position du separateur

	#1. convertir le nombre romain en entier
	for c in chaine: #parcourir la chaine
		i = i + 1
		if c in seps:
			morceau = chaine[sep:i].upper() 		#le morceau à traiter
			try:
				chaine = chaine[:sep] + str(fromRoman(morceau)) + chaine[i:]
			except:
				pass
			sep = i+1
		if sep == 0:
			try:
				chaine = str(fromRoman(chaine))
			except:
				pass 
		
	chaine = supprimer_sep_nombre(chaine)
	return chaine	
    def parse_dependencies(self, text_or_node, submodule):

        submodule = roman.fromRoman(submodule) - 1

        dep_strings = []
        if isinstance(text_or_node, basestring):
            dep_strings = [text_or_node]
        else:
            enumerations = text_or_node.traverse(utils.is_list_or_enumeration)
            for enumeration in enumerations:
                dep_strings += utils.parse_list_items(text_or_node)

        if submodule not in self.dependencies:
            self.dependencies[submodule] = []
        self.dependencies[submodule] += dep_strings
Beispiel #57
0
def token_features(tok_input):
    tok = tok_input.strip()
    parts = tok.split('.')
    # Mark special sections
    if tok == "Abstract" or tok == "ABSTRACT" or tok == "Acknowledgement" or tok == "ACKNOWLEDGEMENT" or tok == "References" or tok == "Reference" or tok == "REFERENCE" or tok == "REFERENCES" or tok == "Acknowledgements" or tok == "ACKNOWLEDGEMENTs":
        return "6"
    # Mark special symbol '$$$'
    if tok == "$$$":
        return "5"
    # Mark table/figure headings
    if tok == "Table" or tok == "TABLE" or tok == "Figure" or tok == "FIGURE" or tok == "Fig.":
        return "0"
    p_len = len(parts)
    # Digit tokens
    if p_len == 1:
        if tok.isdigit() and 1 <= int(tok) <= 20:
            return "1"
    if p_len == 2 or p_len == 3:
        if parts[0].isdigit() and 1 <= int(parts[0]) <= 20:
            if parts[1] == '' or (parts[1].isdigit() and int(parts[1]) <= 20):
                if p_len == 2:
                    return "1"
                if parts[1].isdigit() and int(parts[1]) <= 20 and parts[2] == '':
                    return "1"
    # Handling Roman Numericals
    if p_len == 1 or (p_len == 2 and parts[1] == ''):
        try:
            val = roman.fromRoman(parts[0].upper())
            if val <= 20:
                return "1"
        except:
            pass
        if ((len(parts[0]) == 1 and 'A' <= parts[0] <= 'Z') or (
                            len(parts[0]) == 3 and parts[0][0] == '(' and parts[0][2] == ')' and parts[0][
                1].isalpha() and parts[0][1].isupper()) or (
                                len(parts[0]) == 2 and parts[0][1] == ')' and parts[0][0].isalpha() and parts[0][
                    0].isupper())):
            if p_len == 1 or (p_len == 2 and parts[1] == ''):
                return "1"
    # Handling Capitalizations
    if tok[0].isupper():
        return "2"
    if not (parts[0].isalpha() or parts[0].isdigit()):
        return "3"
    return "4"
Beispiel #58
0
def solve(ques):
    # We only need lhs
    ques = ques.split("=")[0]
    # We don't need commas
    ques = ques.repace(',', '')
    parsed = ''
    # Let's separate parts between operators
    groups = re.split('([\-\+\*\/\(\)])', ques)
    # Convert each part to a number
    for g in groups:
        g = g.strip()
        # If part a roman numeral, convert it to decimal
        if re.match('([A-Z]+)', g):
            parsed += "%s" % roman.fromRoman(g)
        # If part is in plain english, convert it to decimal
        elif re.match('([a-z]+)', g):
            parsed += "%s" % text2int(g)
        else: parsed += g
    # Now eval can do the rest
    return "%s\r\n" % (eval(parsed))
def decide_REIA_or_REWL(re_sub):
    test1 = re_sub.group(0)
    searcher = FIT.search(re_sub.group(0))
    (arg1, arg2, arg3) = build_arg(searcher)
    if searcher.group(1) == 'archive':  # archive
        return '{{REIA|%s|%s}}' % (arg1, arg2)
    else: # wikilivre
        arabic = roman.fromRoman(searcher.group(3))
        subl = searcher.group(2)
        app = searcher.group(4)
        halfband = searcher.group(5)
        if  (app is None) and (subl is None) and (arabic < 12): # all between I and XI
            return '{{REIA|%s|%s}}' % (arg1, arg2)
        # I A,1 and II A,1
        elif (subl is None) and (app == 'A') and (arabic < 3) and (halfband == '1'):
            return '{{REIA|%s|%s}}' % (arg1, arg2)
        # S I, S II, S III
        elif (subl == 'S') and (app is None) and (arabic < 4) and (halfband is None):
            return '{{REIA|%s|%s}}' % (arg1, arg2)
        else: # rest
            return '{{REWL|%s|%s}}' % (arg1, arg2)