def make_build_alignment(c_alns, mc_alns, hmc_alns, strand, threshold, max_assignments): # translation tables for methylation C_trans_table = string.maketrans("C", "C") mC_trans_table = string.maketrans("C", "E") hmC_trans_table = string.maketrans("C", "O") C_table = collect_assignments(c_alns, strand, threshold, max_assignments, C_trans_table) mC_table = collect_assignments(mc_alns, strand, threshold, max_assignments, mC_trans_table) hmC_table = collect_assignments(hmc_alns, strand, threshold, max_assignments, hmC_trans_table) nb_c_assignments = C_table.shape[0] if C_table is not None else "None" nb_mc_assignments = mC_table.shape[0] if mC_table is not None else "None" nb_hmc_assignments = hmC_table.shape[0] if hmC_table is not None else "None" print("[buildAlignments] NOTICE: I found {C} C-assignments, {mC} mC-assignments, and {hmC} hmC-assignments " "for strand {strand}" "".format(C=nb_c_assignments, mC=nb_mc_assignments, hmC=nb_hmc_assignments, strand=strand), file=sys.stderr) tables = [] for table in (C_table, mC_table, hmC_table): if table is None: continue else: tables.append(table) return pd.concat(tables)
def __rhymeQuotient(comment): """ Calculate the "rhymy-ness" of a comment, basically how many line ending words are similar to other line-ending words """ #strip punctuation and numbers comment = comment.translate(string.maketrans("",""), string.punctuation) comment = comment.translate(string.maketrans("",""), string.digits) lines = comment.split('\n') lastWords = [] for line in lines: words = line.split() if len(words) >= 1: lastWords.append(words[-1]) # can't have rhymes if we have fewer than two lines if len(lastWords) < 2: return 0 #print lastWords # now score each word by similarity with a following word for i in range(len(lastWords)): best = 0 for j in range(i+1,len(lastWords)): best = max(best, TimesComments.__sharedLetters(lastWords[i],lastWords[j])) lastWords[i] = best lastWords = map(lambda x: 5 if x >= 2 else x, lastWords) return sum(lastWords)/(len(lastWords)-1)
def unlisted_words(sample, reference): #Sample string is cleaned and cut, removing punctuation/symbols, making all characters lowercase then splitting the string into a list based on spacing or newline s = (sample.translate(string.maketrans("",""),string.punctuation)).lower().split() #Reference string is cleaned and cut, removing punctuation/symbols, making all characters lowercase then splitting the string into a list based on spacing or newline r = (reference.translate(string.maketrans("",""),string.punctuation)).lower().split() #Using list comprehension identifies if a word in sample is in reference. The list is then converted to a set removing duplicates and transformed again into a list. return list(set([w for w in s if r.__contains__(w) == False]))
def checkreserveduser(user): if os.path.isdir(os.path.join(config.datadir, user)): return if user.lower() != user: errorexit('Username should be lowercase.') normalized = user.lower() if os.path.isdir(os.path.join(config.datadir, normalized)): errorexit('Username is reserved.') normalized = user.lower() if normalized != user and os.path.isdir( os.path.join(config.datadir, normalized)): errorexit('Username is reserved.') normalizedi = normalized.translate(string.maketrans('013456789', 'oieasbtbg')) if normalized != normalizedi and os.path.isdir( os.path.join(config.datadir, normalizedi)): errorexit('Username is reserved.') normalizedl = normalized.translate(string.maketrans('013456789', 'oleasbtbg')) if normalizedl != normalized and os.path.isdir( os.path.join(config.datadir, normalizedl)): errorexit('Username is reserved.') with open(os.path.join(progpath, 'bad-words.txt')) as f: badwords = f.read().splitlines() if any(word in badwords for word in [normalized, normalizedi, normalizedl]): errorexit('Username is reserved.') return
def solve_partial(w, h, pattern, answer, fix_num, solve_num, reset_loop_count=RESET_LOOP_COUNT): trans_str_wall = answer[:fix_num] trans_table_wall = string.maketrans(trans_str_wall, '=' * len(trans_str_wall)) trans_str_asta = answer[fix_num + solve_num:-1].replace('=', '') trans_table_asta = string.maketrans(trans_str_asta, '*' * len(trans_str_asta)) pattern_rep = pattern.translate(trans_table_wall) pattern_rep = pattern_rep.translate(trans_table_asta) answer_rep = answer.translate(trans_table_wall) answer_rep = answer_rep.translate(trans_table_asta) ####### debug ####### print '--------- pattern_rep' print_pattern(w, h, pattern_rep) print '--------- answer_rep' print_pattern(w, h, answer_rep) ####### debug ####### move = solve_all(w, h, pattern_rep, answer_rep, reset_loop_count) ####### debug ####### if move: pattern_work = create_pattern(w, h, pattern, move) print '--------- succeeded' print_pattern(w, h, pattern_work) else: print '--------- not succeeded' ####### debug ####### return move
def make_table(do_encrypt=True): original = ''.join([chr(i) for i in range(0, 128)]) encrypted = ''.join([chr(i * 2 % 127) for i in range(0, 128)]) if do_encrypt: return string.maketrans(original, encrypted) else: return string.maketrans(encrypted, original)
def build_sample_ids_transtable(): """Build translation table for sample ids being MIENS compliant""" all_chars = "".join([chr(i) for i in range(128)]) valid_sample_id_chars = letters + digits + "." non_valid_sample_id_chars = all_chars.translate(maketrans("", ""), valid_sample_id_chars) trans_table = maketrans(non_valid_sample_id_chars, "." * len(non_valid_sample_id_chars)) return trans_table
def _translator(frm='', to='', delete='', keep=None): """generate a translator which can be called on a string for substituting ``frm`` to ``to`` , deleting ``delete`` and keep the ``keep`` . This funciton is only for python 2 becuase the ``maketrans`` funciton returns a dict in python 3. examples: >>> trans = translator('1234567890', '!@#$%^&*()') >>> trans('a1b2c3d4e5f6g7') 'a!b@c#d$e%f^g&' >>> trans = translator('1234567890', '!@#$%^&*()', '123') >>> trans('a1b2c3d4e5f6g7') 'abcd$e%f^g&' >>> trans = translator('1234567890', '!@#$%^&*()', '123', '345') >>> trans('a1b2c3d4e5f6g7') '$%' >>> trans = translator('1234567890', '!@#$%^&*()', '123', '345ab') >>> trans('a1b2c3d4e5f6g7') 'ab$%' """ if len(to) == 1: to = to * len(frm) table = string.maketrans(frm, to) if keep is not None: all_chars = string.maketrans('', '') delete = all_chars.translate(all_chars, keep.translate(all_chars, delete)) def translate(s): return s.translate(table, delete) return translate
def generateHashtagSpecificVocabulary(self): wordsMappedToHashtags = {} for tweet in self.trainSet: words = [] hashtags = [] for word in tweet.split(): if word.startswith('#') and len(word) > 2: word = word.lower().translate(string.maketrans("",""), string.punctuation) # remove punctuation hashtags.append(word) words.append(word) else: if '@' in word: continue if word in self.stopWords: continue word = word.lower().translate(string.maketrans("",""), string.punctuation) # remove punctuation words.append(word) for hashtag in hashtags: if hashtag not in wordsMappedToHashtags: wordsMappedToHashtags[hashtag] = {} for word in words: if word not in wordsMappedToHashtags[hashtag]: wordsMappedToHashtags[hashtag][word] = 1.0 else: wordsMappedToHashtags[hashtag][word] += 1.0 return wordsMappedToHashtags
def scrape_links_and_wordlistify(links, lower=False, verbose=1): import nltk import requests import string raw = '' wordlist = {} for site in links: try: if verbose == 1: print '[+] fetching data from: ', site if site.find('http://pastebin.com/') == 0: raw = requests.get(site.replace('http://pastebin.com/', 'http://pastebin.com/raw.php?i=')).content else: raw = requests.get(site).content if lower == False: l = string.translate(nltk.clean_html(raw), string.maketrans(string.punctuation, ' ' * 32)).split() freq_an(l, wordlist) else: l = string.lower(nltk.clean_html(raw)) l = string.translate(l, string.maketrans(string.punctuation, ' ' * 32)).split() freq_an(l, wordlist) except: if verbose == 1: print '[-] Skipping url: ', site return wordlist
def generateCounts(self): wordCounts = {} hashtagCounts = {} for tweet in self.trainSet: hashtags = [] for word in tweet.split(): if word.startswith('#') and len(word) > 2: word = word.lower().translate(string.maketrans("",""), string.punctuation) # remove punctuation hashtags.append(word) if word not in wordCounts: wordCounts[word] = 1 else: wordCounts[word] += 1 else: if '@' in word: continue if word in self.stopWords: continue word = word.lower().translate(string.maketrans("",""), string.punctuation) # remove punctuation if word not in wordCounts: wordCounts[word] = 1 else: wordCounts[word] += 1 for hashtag in hashtags: if hashtag not in hashtagCounts: hashtagCounts[hashtag] = 1.0 else: hashtagCounts[hashtag] += 1.0 return wordCounts, hashtagCounts
def ROT13(test_string): """Return a encrypted string offset by 13 Encrytion does not impact spaces or special characters""" shifted_cypherlower = string.ascii_lowercase[13:] + string.ascii_lowercase[:13] shifted_cyperupper = string.ascii_uppercase[13:] + string.ascii_uppercase[:13] transtable_lower = string.maketrans(string.ascii_lowercase, shifted_cypherlower) #print transtable_lower transtable_upper = string.maketrans(string.ascii_uppercase, shifted_cyperupper) #print transtable_upper encrypted_text = [] for letter in test_string: if letter.islower(): encrypted_text.append(letter.translate(transtable_lower)) else: encrypted_text.append(letter.translate(transtable_upper)) final_encryption = "".join(encrypted_text) return final_encryption
def __init__(self): self.currentTroll = random.randint(0, 10) self.trollCounter = 0 self.trolls = [ #aradia { 'prefix': '', 'replace': lambda x: x.translate(string.maketrans('oo', '00')) }, #terezi { 'prefix': '', 'replace': lambda x: x.upper().translate(string.maketrans('AIE', '413')) }, #tavros { 'prefix': '', 'replace': lambda x: x.title().swapcase() }, #sollux { 'prefix': '', 'replace': lambda x: x.replace('s', '2').replace('S', '2').replace('i', 'ii').replace('I', 'II') }, #karkat { 'prefix': '', 'replace': lambda x: x.upper() }, #nepeta { 'prefix': ':33 <', 'replace': lambda x: x.replace('ee', '33').replace('EE', '33') }, #kanaya { 'prefix': '', 'replace': lambda x: x.capitalize() }, #vriska { 'prefix': '', 'replace': lambda x: x.translate(string.maketrans('bB', '88')).replace('ate', '8') }, #equius { 'prefix': 'D -->', 'replace': lambda x: x.translate(string.maketrans('xX', '%%')) }, #gamzee TODO need a full func #eridan { 'prefix': '', 'replace': lambda x: x.replace('w', 'ww').replace('v', 'vv').replace('W', 'WW').replace('V', 'VV') }, #feferi { 'prefix': '', 'replace': lambda x: x.replace('h', ')(').replace('H', ')(').replace('E', '-E') }, ]
def tm(): import sys import nltk import string input_file_name = raw_input("Please enter the input file name: ") input_path = raw_input("Please enter the input path: ") output_file_name = raw_input("Please enter the output file name: ") print '\nPlease note that the above entered filename would be used as', print 'a prefix for the entire set of documents to be generated.\n' output_path = raw_input("Please enter the output path: ") with open (input_path + '\\' + input_file_name + '.txt','r') as f: para = [] data = f.read() selected=0 notselect=0 sentences = data.split("\n\n") print "Total # of paragraphs",len(sentences) for x in xrange(len(sentences)): cond = sentences[x].endswith(".") if cond: cnt = sentences[x].count(".") else: cnt= sentences[x].count(".")+1 if cnt >5: #print "paragraph ",x+1,"is selected" selected+=1 sentences[x] = '@'+sentences[x].lower() sentences[x] = sentences[x].translate(string.maketrans("",""),string.digits) sentences[x] = sentences[x].translate(string.maketrans("",""),string.punctuation) tokens = nltk.word_tokenize(sentences[x]) lemma = nltk.WordNetLemmatizer() porter = nltk.PorterStemmer() afewwords = [lemma.lemmatize(i) for i in tokens] afewwords = [porter.stem(i) for i in tokens] sentences[x] = ' '.join(afewwords) para.append(sentences[x]) filename = output_path + '\\' + output_file_name + str(selected) + '.txt' w = open(filename,'w') w.write(''.join(para)) w.close() para = [] else: #print "paragraph ",x+1,"is not selected" notselect+=1 #print "cnt - ", cnt #print"\n" print "# of paragraphs selected",selected print "# of paragraphs not selected",notselect f.close()
def __flatten(dic, prefix): ''' recursively pass through a dict and flatten it\'s "internal" dicts ''' results = {} if dic is not None: try: for key in dic.keys(): if type(dic[key]) in [float, int]: results["%s.%s" % ( prefix, str(key).translate(string.maketrans(".", "_")) )] = dic[key] elif type(dic[key] in [dict]): results.update( GraphiteUploaderPlugin.__flatten( dic[key], "%s.%s" % ( prefix, key.translate(string.maketrans(".", "_")) ) ) ) except AttributeError: pass return results
def checkData(self,row): jobTime = row[4] jobTerm = row[5] w1={'full time':1,'part time':-1,'':0} #this dictionary corresponds to time feature w2={'permanent':0,'contract':1,'':-1} #this dictionary corresponds to term feature if jobTime == '' or jobTerm == '': s=row[2].lower() s=s.translate(string.maketrans("‘’‚“”„†‡‰‹›!“#$%&‘()™*+,-�./0123456789:;<=>?@[\]_`{|}~–—΅Ά£¤¥¦§¨©�«¬®―°±²³΄µ¶·ΈΉΊ»Ό½ΎΏήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ�€Άβ—�ο�'"," ")) if jobTime=='': if ('full time' in s and 'part time' in s) or ('full time' not in s and 'part time' not in s): word1='' else: if 'full time' in s: #searching full time in description word1='full time' else: word1='part time' else: word1=jobTime.translate(string.maketrans("_"," ")) #removing underscore from time feature value if jobTerm=='': if ('permanent' in s and 'contract' in s) or ('permanent' not in s and 'contract' not in s): word2='' else: if 'permanent' in s: #searching permanent in description word2='permanent' else: word2='contract' else: word2=jobTerm.translate(string.maketrans("_"," ")) #removing underscore from term feature value else: word1=jobTime.translate(string.maketrans("_"," ")) word2=jobTerm.translate(string.maketrans("_"," ")) return [word1,w1[word1],word2,w2[word2]]
def get_kmer_densities(path, kmer): mC_trans = string.maketrans("C", "E") hmC_trans = string.maketrans("C", "O") c_density = KmerHdpDistribution(path, kmer) mc_density = KmerHdpDistribution(path, kmer.translate(mC_trans)) hmc_density = KmerHdpDistribution(path, kmer.translate(hmC_trans)) return c_density, mc_density, hmc_density
def listShows(path, forBackLog=False): if not forBackLog: trans = string.maketrans(' ', '.') else: trans = string.maketrans(' ', '_') shows = {} try: for show_name in os.listdir(path): if os.path.isdir(path + show_name) == True: sanitized_show_name = show_name.translate(trans, '\'().!').lower() shows[sanitized_show_name] = show_name except: log.error('Unable to find ' + path) sys.exit() items = config['rss.mapping'].split(',') if len(items) > 0: for item in items: i = item.split('=') if len(i) > 0: local = i[0].strip() dist = i[1].strip().translate(trans, '\'().!').lower() shows[dist] = local log.debug('Extra TV shows mapping : %s => %s' % (dist, local) ) return shows
def build_anagrams(): ''' build a python dict of sorted-word and the set of words Using brown corpus and a file containing words from multiple corpora strip all punctuations, retain hyphens, replace underscores with space ''' punctuations_replace = '#"(){}[]<>.+,/:;=?@_|~-' punctuations_remove = '!$\'%&\\*^`' # 1. Brown Corpus for word in nltk.corpus.brown.words(): w = str(word) # polish just the way tokens were w_list = w.translate(string.maketrans(punctuations_replace,' '*len(punctuations_replace)), punctuations_remove).strip().lower().split() for each_w in w_list: # add the word to redis with key as a sorted word wam[''.join(sorted(each_w))].add(each_w) # 2. Wordnet for word in nltk.wordnet.wordnet.words(): w = str(word) # polish just the way tokens were w_list = w.translate(string.maketrans(punctuations_replace,' '*len(punctuations_replace)), punctuations_remove).strip().lower().split() for each_w in w_list: # add the word to redis with key as a sorted word wam[''.join(sorted(each_w))].add(each_w) # 3. Other corpora with open(BIG_WL, 'r') as f: for line in f: w = str(line).strip() # polish just the way tokens were w_list = w.translate(string.maketrans(punctuations_replace,' '*len(punctuations_replace)), punctuations_remove).strip().lower().split() for each_w in w_list: # add the word to redis with key as a sorted word wam[''.join(sorted(each_w))].add(each_w)
def test_the_reverse_complement(self): """Check obj.reverse_complement() method.""" mapping = "" for example1 in self._examples: if isinstance(example1, MutableSeq): continue try: comp = example1.reverse_complement() except ValueError as e: self.assertEqual(str(e), "Proteins do not have complements!") continue str1 = str(example1) # This only does the unambiguous cases if any(("U" in str1, "u" in str1, example1.alphabet == generic_rna)): mapping = maketrans("ACGUacgu", "UGCAugca") elif any(("T" in str1, "t" in str1, example1.alphabet == generic_dna, example1.alphabet == generic_nucleotide)): mapping = maketrans("ACGTacgt", "TGCAtgca") elif "A" not in str1 and "a" not in str1: mapping = maketrans("CGcg", "GCgc") else: # TODO - look at alphabet? continue self.assertEqual(str1.translate(mapping)[::-1], str(comp)) self.assertEqual(comp.alphabet, example1.alphabet)
def buildGoodSet(goodChars=string.printable, badChar='?'): """Build a translation table that turns all characters not in goodChars to badChar""" allChars=string.maketrans("", "") badchars=string.translate(allChars, allChars, goodChars) rv=string.maketrans(badchars, badChar * len(badchars)) return rv
def html_for_url_node(node): if not re.match("javascript:", node["url"]): linkURL = sanitize(node["url"]) keysURL = linkURL ktrspaces = " " ktrtable = string.maketrans("!@#$%^&*()_+-=`~;:'\",<.>/?\\|", ktrspaces) keysURL = str(keysURL).translate(ktrtable, "").lower() keysURL = str(keysURL).translate(None, "!@#$%^&*()_+-=`~;:'\",<.>/?\\|").lower() # tags = sanitize(node["name"]) # tags= node['name'] Check for UTF-8 etc... # print "TAGS: ",tags # tags = tags.translate(None,'!@#$%^&*()_+-=`~;:\'",<.>/?\\|') # tags.translate(None,'!@#$%^&*()_+-=`~;:\'",<.>/?\\|') # trtable = ' ' trspaces = " " trtable = string.maketrans("!@#$%^&*()_+-=`~;:'\",<.>/?\\|", trspaces) # tags = str(tags).translate(trtable,'!@#$%^&*()_+-=`~;:\'",<.>/?\\|') tags = str(tags).translate(trtable, "").lower() tags = str(tags).translate(None, "!@#$%^&*()_+-=`~;:'\",<.>/?\\|").lower() # allTags = tags + " " + keysURL print "# '", sanitize(node["url"]), "'", allTags # print '# \'',sanitize(node['url']),'\'', tags return '<dt><a href="%s">%s</a>\n' % (sanitize(node["url"]), sanitize(node["name"])) else: return ""
def reverseComplement(seq, rna=False): if rna: complements = string.maketrans("AUCGN", "UAGCN") return convertToRNA(seq).translate(complements)[::-1] else: complements = string.maketrans("ATCGN", "TAGCN") return convertToDNA(seq).translate(complements)[::-1]
def hashword(plaintext): """ Munge a plaintext word into something else. Hopefully, the result will have some mnemonic value. """ # get a list of random bytes. A byte will be randomly picked from # this list when needed. rb = getrandomlist() # 0.25 chance of case being swapped if rb[rb[0]] < 64: plaintext = string.swapcase(plaintext) # 0.50 chance of vowels being translated one of two ways. if rb[rb[2]] > 127: plaintext = string.translate(plaintext, string.maketrans('aeiou AEIOU', '@3!0& 4#10%')) else: plaintext = string.translate(plaintext, string.maketrans('aeiou AEIOU', '^#1$~ $3!0&')) # 0.4 chance of some additional consonant translation if rb[rb[4]] < 102: plaintext = string.translate(plaintext, string.maketrans('cglt CGLT', '(<1+ (<1+')) # if word is short, add some digits if len(plaintext) < 5: plaintext = plaintext + `rb[5]` # 0.2 chance of some more digits appended if rb[rb[3]] < 51: plaintext = plaintext + `rb[205]` return plaintext
def url_sign( uri_path, params, client_id, signing_key ): signing_key = signing_key.translate(string.maketrans('-_', '+/')) padding_factor = ( 4 - len( signing_key ) % 4 ) % 4 signing_key += "=" * padding_factor binary_key = base64.b64decode(unicode(signing_key).translate(dict(zip(map(ord, u'-_'), u'+/')))) # construct URI for signing uri_path_params = uri_path + '?' first = True for k in params.keys(): if not first: uri_path_params += '&' else: first = False uri_path_params = "%(base)s%(key)s=%(value)s" % { 'base':uri_path_params, 'key':k, 'value':urllib.quote_plus(str(params[k])) } uri_path_params += '&client=' + client_id # Sign digest = hmac.new(binary_key, uri_path_params, hashlib.sha1).digest() digest = base64.b64encode( digest ) digest = digest.translate(string.maketrans('+/', '-_')) return "%s&sig=%s" % ( uri_path_params, digest.rstrip('=') )
def checkData(self,row): jobTime = row[4] jobTerm = row[5] w1={'full time':1,'part time':-1,'':0} #this dictionary corresponds to time feature w2={'permanent':0,'contract':1,'':-1} #this dictionary corresponds to term feature if jobTime == '' or jobTerm == '': s=row[2].lower() s=s.translate(string.maketrans("‘’‚“”„†‡‰‹›!“#$%&‘()™*+,-˜./0123456789:;<=>?@[\]_`{|}~–—¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿Þßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ€¢â—ªïž'"," ")) if jobTime=='': if ('full time' in s and 'part time' in s) or ('full time' not in s and 'part time' not in s): word1='' else: if 'full time' in s: #searching full time in description word1='full time' else: word1='part time' else: word1=jobTime.translate(string.maketrans("_"," ")) #removing underscore from time feature value if jobTerm=='': if ('permanent' in s and 'contract' in s) or ('permanent' not in s and 'contract' not in s): word2='' else: if 'permanent' in s: #searching permanent in description word2='permanent' else: word2='contract' else: word2=jobTerm.translate(string.maketrans("_"," ")) #removing underscore from term feature value else: word1=jobTime.translate(string.maketrans("_"," ")) word2=jobTerm.translate(string.maketrans("_"," ")) return [word1,w1[word1],word2,w2[word2]]
def translate(read, ref, match): read = read[::-1] read = read.translate(string.maketrans('ACGTacgt', 'TGCAtgca')) ref = ref[::-1] ref = ref.translate(string.maketrans('ACGTacgtRYKMBVDH', 'TGCAtgcaYRMKVBHD')) match = match[::-1] return (read, ref, match)
def retrieve_access_token (self): output.speak(_("Please wait while an access token is retrieved from Twitter."), True) httpd = BaseHTTPServer.HTTPServer(('127.0.0.1', 8080), Handler) twitterDataOrig = str(self.config['oauth']['twitterData']) trans = maketrans("-_~", "+/=") twitterDataTrans = twitterDataOrig.translate(trans) twitterData = b64decode(twitterDataTrans) twitterData = literal_eval(twitterData) tw = Twython(twitterData[0], twitterData[1], auth_endpoint='authorize') try: auth = tw.get_authentication_tokens("http://127.0.0.1:8080") except SSLError: output.speak(_("Sorry, we can't connect to Twitter. You may want to adjust your firewall or antivirus software appropriately"), True) webbrowser.open_new_tab(auth['auth_url']) global logged, verifier logged = False while logged == False: httpd.handle_request() self.auth_handler = Twython(twitterData[0], twitterData[1], auth['oauth_token'], auth['oauth_token_secret']) token = self.auth_handler.get_authorized_tokens(verifier) output.speak(_("Retrieved access token from Twitter."), True) httpd.server_close() data = [token['oauth_token'], token['oauth_token_secret']] eData = dumps(data) trans = maketrans("+/=", "-_~") eData = b64encode(eData) eData = eData.translate(trans) self.config['oauth']['userData'] = eData self.login() del (httpd, auth, tw, token, logged, verifier, twitterData, twitterDataOrig, data, edata, self.auth_handler)
def get_encrypt_decrypt_tables(): orig_table = string.maketrans(b'', b'') orig_table = [orig_table[i: i + 1] for i in range(len(orig_table))] orig_table_decrypt = string.maketrans(b''.join(orig_table), string.maketrans(b'', b'')) encrypt_table, decrypt_table = get_table(bytes(PASSWORD)) return encrypt_table, decrypt_table
def translator(frm='',to='',delete = '',keep=None): """ @write by Chris Perkins @copy by Qingluan a close-block factory to deal with some normal string """ import string if len(to)==1: to = to * len(frm) trans = string.maketrans(frm,to) if keep is not None: all_chars = string.maketrans('','') # this will get most of chars delete = all_chars.translate(allchars,keep.translate(allchars,delete)) def translate(s): return s.translate (trans,delete) return translate
def remove_punctuation(text): result = text.translate(string.maketrans("", ""), string.punctuation) print(result) return result
#!/usr/bin/env python import sys import string from sklearn.feature_extraction import stop_words stops = set(stop_words.ENGLISH_STOP_WORDS) stops.update('0', '1', '2', '3', '4', '5', '6', '7', '8', '9') # add digits to stopwords (brute force way) stops.update(['thou', 'shall', 'thee']) # notice, update needs a formal "set", or else it will add each single character... # get all lines from stdin for line in sys.stdin: # remove leading and trailing whitespace and convert to lowercase line = line.strip().lower() # remove punctuation # (I wonder if it would be best done on the full line here, vs. on 'words' after the split)? line = line.translate(string.maketrans("",""), string.punctuation) # split the line into words; splits on any whitespace words = line.split() # output tuples (word, 1) in tab-delimited format, but only if they're not a stopword! for word in words: if word not in stops: # omit stopwords if len(word) > 3: # ignore words shorter than 3 characters print '%s\t%s' % (word, "1")
reg_decl_with_pe=re.compile("<(![^-\[]|\?)") reg_subst_pe_search=re.compile(">|%") reg_cond_sect=re.compile("<!\\[|\\]\\]>") reg_litval_stop=re.compile("%|&#") # RFC 1766 language codes reg_lang_code=re.compile("([a-zA-Z][a-zA-Z]|[iIxX]-([a-zA-Z])+)(-[a-zA-Z])*") # Mapping regexps to error codes # NB: 3900 is reported directly from _get_name reg2code={ reg_name.pattern : 3900, reg_ver.pattern : 3901, reg_enc_name.pattern : 3902, reg_std_alone.pattern : 3903, reg_hex_digits.pattern : 3905, reg_digits.pattern : 3906, reg_pe_ref.pattern : 3907, reg_attr_type.pattern : 3908, reg_attr_def.pattern : 3909, reg_nmtoken.pattern : 3910} # Some useful variables predef_ents={"lt":"<","gt":">","amp":"&","apos":"'", "quot":'"'} # Translation tables ws_trans=string.maketrans("\r\t\n"," ") # Whitespace normalization id_trans=string.maketrans("","") # Identity transform
InvalidResourceSpecification, ) from .defaults import ( GET, POST, DELETE, Status, DupeOption, UuidOption, JobDefFormat, JOB_RUN_TIMEOUT, JOB_RUN_INTERVAL, ) _JOB_ID_CHARS = ascii_letters + digits _JOB_ID_TRANS_TAB = maketrans(_JOB_ID_CHARS, '#' * len(_JOB_ID_CHARS)) _JOB_ID_TEMPLATE = '########-####-####-####-############' _RUNDECK_RESP_FORMATS = ('xml') # TODO: yaml and json _EXECUTION_COMPLETED = (Status.FAILED, Status.SUCCEEDED, Status.ABORTED) _EXECUTION_PENDING = (Status.RUNNING,) def is_job_id(job_id): """Checks if a Job ID "looks" like a UUID. It does not check if it exists as a job in Rundeck. And of course, a Rundeck Job ID does not have to be a "UUID". Any unique string will do so be prepared for false negatives if you customize your job ids. :Parameters: job_id : str a Rundeck Job ID
def rot(message, shift=3): "Employs the Ceasar Cipher to encode/decode messages." alphabet = ascii_lowercase shifted_alphabet = alphabet[shift:] + alphabet[:shift] table = maketrans(alphabet, shifted_alphabet) return message.lower().translate(table)
def process(lst): prccd_item_list = [] for tweet in lst: # print "[original]", tweet # print(tweet) # Normalizing utf8 formatting tweet = tweet.decode("unicode-escape").encode("utf8").decode("utf8") #tweet = tweet.encode("utf-8") tweet = tweet.encode("ascii", "ignore") tweet = tweet.strip(' \t\n\r') # 1. Lowercasing tweet = tweet.lower() # print "[lowercase]", tweet # Word-Level tweet = re.sub(' +', ' ', tweet) # replace multiple spaces with a single space # 2. Normalizing digits tweet_words = tweet.strip('\r').split(' ') for word in [word for word in tweet_words if word.isdigit()]: tweet = tweet.replace(word, "D" * len(word)) # print "[digits]", tweet # 3. Normalizing URLs tweet_words = tweet.strip('\r').split(' ') for word in [ word for word in tweet_words if '/' in word or '.' in word and len(word) > 3 ]: tweet = tweet.replace(word, "httpAddress") # print "[URLs]", tweet # 4. Normalizing username tweet_words = tweet.strip('\r').split(' ') for word in [ word for word in tweet_words if word[0] == '@' and len(word) > 1 ]: tweet = tweet.replace(word, "usrId") # print "[usrename]", tweet # 5. Removing special Characters punc = '@$%^&*()_+-={}[]:"|\'\~`<>/,' trans = string.maketrans(punc, ' ' * len(punc)) tweet = tweet.translate(trans) # print "[punc]", tweet # 6. Normalizing +2 elongated char tweet = re.sub(r"(.)\1\1+", r'\1\1', tweet.decode('utf-8')) # print "[elong]", tweet # 7. tokenization using tweetNLP tweet = ' '.join(twokenize.simpleTokenize(tweet)) # print "[token]", tweet #8. fix \n char tweet = tweet.replace('\n', ' ') prccd_item_list.append(tweet.strip()) # print "[processed]", tweet.replace('\n', ' ') return prccd_item_list
def get_transtable(): transtable = maketrans(ORIGIN_ALPHABET, CUSTOMER_ALPHABET) return transtable
self.typereso = typereso self.type_descriptions = [] self.signature = kwargs.pop('signature', None) for td in type_descriptions: self.type_descriptions.extend(td) for td in self.type_descriptions: td.finish_signature(self.nin, self.nout) if kwargs: raise ValueError('unknown kwargs %r' % str(kwargs)) # String-handling utilities to avoid locale-dependence. import string if sys.version_info[0] < 3: UPPER_TABLE = string.maketrans(string.ascii_lowercase, string.ascii_uppercase) else: UPPER_TABLE = bytes.maketrans(bytes(string.ascii_lowercase, "ascii"), bytes(string.ascii_uppercase, "ascii")) def english_upper(s): """ Apply English case rules to convert ASCII strings to all upper case. This is an internal utility function to replace calls to str.upper() such that we can avoid changing behavior with changing locales. In particular, Turkish has distinct dotted and dotless variants of the Latin letter "I" in both lowercase and uppercase. Thus, "i".upper() != "I" in a "tr" locale. Parameters ----------
import logging import hashlib import hmac import bcrypt from time import time from struct import pack, unpack from base64 import b64decode from string import maketrans usersafe_encoding = maketrans('-$%', 'OIl') class auth: def __init__(self, reqdata): self.reqdata = reqdata # First try if it is a valid token # Failure may just indicate that we were passed a password def auth_token(self): try: token = b64decode(self.password.translate(usersafe_encoding) + '=======') except: logging.debug('Could not decode token (maybe not a token?)') return False jid = self.username + '@' + self.domain if len(token) != 23: logging.debug('Token is too short: %d != 23 (maybe not a token?)' % len(token)) return False (version, mac, header) = unpack('> B 16s 6s', token)
def get_reverse_transtable(): transtable = maketrans(CUSTOMER_ALPHABET, ORIGIN_ALPHABET) return transtable