Ejemplo n.º 1
0
def index_img_kw(img, title, description):
    """
    Parses the the title and description and creates a frequency table,
    then stores the frequencies into the Keywords table for the given
    image_id
    """

    frequencies = {}
    title_kws = title.split()
    des_kws = description.split()


    for word in title_kws:
        word = word.lower()
        word = string.translate(word, None, string.punctuation)
        if word not in STOP_WORDS:
            frequencies[word] = frequencies[word] + 2 if word in frequencies else 2
    
    for word in des_kws:
        if word not in STOP_WORDS:
            word = word.lower()
            word = string.translate(word, None, string.punctuation)
            frequencies[word] = frequencies[word] + 1 if word in frequencies else 1
    
    # Save in database now for this image
    try:
        for entry, val in frequencies.items():
        
            kw = Keywords()
            kw.keyword = entry.lower()
            kw.frequency = val
            kw.image = img
            kw.save()
    except:
        print sys.exc_info()
Ejemplo n.º 2
0
def alleleCount(baseList,refNuc):
    table = string.maketrans('ATCG', 'TAGC')
    pos = defaultdict(int)
    neg = defaultdict(int)
    if refNuc in ['A', 'T']:
        for (base, isReverse) in baseList:
            if isReverse:    # negative strand
                neg[string.translate(base, table)] += 1
            else:    # positive strand
                pos[base] += 1
    elif refNuc == 'C': # only negative strand
        for (base, isReverse) in baseList:
            if isReverse:
                neg[string.translate(base, table)] += 1
    elif refNuc == 'G': # only positive strand
        for (base, isReverse) in baseList:
            if not isReverse:
                pos[base] += 1
    aCount = pos['A'] + neg['A']
    tCount = pos['T'] + neg['T']
    cCount = pos['C'] + neg['C']
    gCount = pos['G'] + neg['G']
    total = aCount + tCount + cCount + gCount
    posCov = sum([pos[base] for base in ['A', 'T', 'C', 'G']])
    negCov = sum([neg[base] for base in ['A', 'T', 'C', 'G']])
    return aCount, tCount, cCount, gCount, total, posCov, negCov
Ejemplo n.º 3
0
def encrypt_all(password, method, op, data):
    if method is not None and method.lower() == 'table':
        method = None
    if not method:
        [encrypt_table, decrypt_table] = init_table(password)
        if op:
            return string.translate(data, encrypt_table)
        else:
            return string.translate(data, decrypt_table)
    else:
        import M2Crypto.EVP
        result = []
        method = method.lower()
        (key_len, iv_len) = method_supported[method]
        (key, _) = EVP_BytesToKey(password, key_len, iv_len)
        if op:
            iv = random_string(iv_len)
            result.append(iv)
        else:
            iv = data[:iv_len]
            data = data[iv_len:]
        if method == 'salsa20-ctr':
            cipher = encrypt_salsa20.Salsa20Cipher(method, key, iv, op)
        elif method == 'rc4-md5':
            cipher = encrypt_rc4_md5.create_cipher(method, key, iv, op)
        else:
            cipher = M2Crypto.EVP.Cipher(method.replace('-', '_'), key, iv,
                                         op, key_as_bytes=0, d='md5',
                                         salt=None, i=1, padding=1)
        result.append(cipher.update(data))
        return ''.join(result)
Ejemplo n.º 4
0
 def __find(self):
     self.status(1, "searching")
     self.__nfound = 0
     for i, filename in enumerate(self.get_files()):
         if i % 16 == 0:
             self.status(self.__nfound)
         try:
             f = open(filename, 'r')
         except IOError:
             continue
         for linenumber, line in enumerate(f):
             if not self.__running:
                 self.__finished()
             if BINARY_RE.match(line):
                 break
             line = string.translate(line, all_chars, hi_bit_chars)
             line = string.translate(line, hi_lo_table)
             matches = self.__pattern.findall(line)
             if len(matches):
                 self.__nfound = self.__nfound + len(matches)
                 if self.__nfound >= self.__options.maxresults:
                     self.__finished()
                 result = GrepResult(linenumber, filename, line, matches)
                 gtk.threads_enter()
                 self.emit('found', result)
                 gtk.threads_leave()
         f.close()
     self.__finished()
Ejemplo n.º 5
0
def make_langs(country_name):
    """Generates all four answers for the primary language quiz question, 
    including the right answer. Returns as a set."""
    langs = set()

    #Make country objects for wrong answers from same continent
    country_obj = Country.query.filter(Country.country_name == country_name).first()
    right_langs = country_obj.languages
    right_langs = str(right_langs)
    right_langs = translate(right_langs, None, '{"}')
    langs.add(right_langs)
    if country_obj.continent_name == "Caribbean":
        langs.add("English, Spanish")

    continent = country_obj.continent_name
    nearby_countries = Country.query.filter(Country.continent_name == continent, Country.country_name != country_name).all()
    top_index = len(nearby_countries) - 1
    print top_index

    while len(langs) < 4:
        index = randint(0, top_index)
        wrong_lang = (nearby_countries[index]).languages
        wrong_lang = str(wrong_lang)
        wrong_lang = translate(wrong_lang, None, '{"}')
        langs.add(wrong_lang)
        print langs
        print len(langs)


    return langs
Ejemplo n.º 6
0
def main():
    f=string.ascii_lowercase
    t=f[2:]+f[:2]
    trans_tables=string.maketrans(f,t)
    encs="g fmnc wms bgblr rpylqjyrc gr zw fylb. rfyrq ufyr amknsrcpq ypc dmp. bmgle gr gl zw fylb gq glcddgagclr ylb rfyr'q ufw rfgq rcvr gq qm jmle. sqgle qrpgle.kyicrpylq() gq pcamkkclbcb. lmu ynnjw ml rfc spj."
    print(string.translate(encs,trans_tables))
    print(string.translate('map',trans_tables))
Ejemplo n.º 7
0
def encrypt_all(password, method, op, data):
    if method is not None and method.lower() == "table":
        method = None
    if not method:
        [encrypt_table, decrypt_table] = init_table(password)
        if op:
            return string.translate(data, encrypt_table)
        else:
            return string.translate(data, decrypt_table)
    else:
        import M2Crypto.EVP

        result = []
        method = method.lower()
        (key_len, iv_len) = method_supported[method]
        (key, _) = EVP_BytesToKey(password, key_len, iv_len)
        if op:
            iv = random_string(iv_len)
            result.append(iv)
        else:
            iv = data[:iv_len]
            data = data[iv_len:]
        cipher = M2Crypto.EVP.Cipher(
            method.replace("-", "_"), key, iv, op, key_as_bytes=0, d="md5", salt=None, i=1, padding=1
        )
        result.append(cipher.update(data))
        f = cipher.final()
        if f:
            result.append(f)
        return "".join(result)
Ejemplo n.º 8
0
def hashword(plaintext):
	"""
	Munge a plaintext word into something else. Hopefully, the result
	will have some mnemonic value.
	"""
	# get a list of random bytes. A byte will be randomly picked from
	# this list when needed.
	rb = getrandomlist()
	# 0.25 chance of case being swapped
	if rb[rb[0]] < 64:
		plaintext = string.swapcase(plaintext)
	# 0.50 chance of vowels being translated one of two ways.
	if rb[rb[2]] > 127:
		plaintext = string.translate(plaintext, 
			string.maketrans('aeiou AEIOU', '@3!0& 4#10%'))
	else:
		plaintext = string.translate(plaintext, 
			string.maketrans('aeiou AEIOU', '^#1$~ $3!0&'))
	# 0.4 chance of some additional consonant translation
	if rb[rb[4]] < 102:
		plaintext = string.translate(plaintext, 
			string.maketrans('cglt CGLT', '(<1+ (<1+'))
	# if word is short, add some digits
	if len(plaintext) < 5:
		plaintext = plaintext + `rb[5]`
	# 0.2 chance of some more digits appended
	if rb[rb[3]] < 51:
		plaintext = plaintext + `rb[205]`
	return plaintext
Ejemplo n.º 9
0
def seq2(ch,start,end):
    trans = string.maketrans('ATCGatcg','TAGCtagc')
    if ch[0:2] == 'ch':
        inFile = open('/netshare1/home1/people/hansun/Data/GenomeSeq/Human/ucsc.hg19.fasta.fa')
        while True:
            line1 = inFile.readline().strip()
            line2 = inFile.readline().strip()
            if line1:
                if line1 == '>'+ch:
                    if start <= end:
                        seq = line2[start-1:end].upper()
                    else:
                        seq = string.translate(line2[end-1:start][::-1],trans).upper()
                    return seq
            else:
                break
        inFile.close()
    elif ch[0:2] == 'NC':
        inFile = open('/netshare1/home1/people/hansun/Data/VirusesGenome/VirusesGenome.fasta.fa')
        while True:
            line1 = inFile.readline().strip()
            line2 = inFile.readline().strip()
            if line1:
                if line1.find('>'+ch) == 0:
                    if start <= end:
                        seq = line2[start-1:end].upper()
                    else:
                        seq = string.translate(line2[end-1:start][::-1],trans).upper()
                    return seq
            else:
                break
        inFile.close()
Ejemplo n.º 10
0
def decoder3():
    table = string.maketrans(
        'abcdefghijklmnopqrstuvwxyz',
        'cdefghijklmnopqrstuvwxyzab'
    )
    print string.translate(codedmessage, table)
    print string.translate('map', table)
Ejemplo n.º 11
0
def word_histogram(source):
    """Create histogram of normalized words (no punct or digits)
	scale that in terms of percentage"""
    hist = {}
    trans = maketrans('','')
    if type(source) in (StringType,UnicodeType):  # String-like src
        for word in split(source):
            word = translate(word, trans, punctuation+digits)
	    word=word.lower()
            if len(word) > 0:
                hist[word] = hist.get(word,0) + 1
    elif hasattr(source,'read'):                  # File-like src
        try:
            from xreadlines import xreadlines     # Check for module
            for line in xreadlines(source):
                for word in split(line):
                    word = translate(word, trans, punctuation+digits)
                    word=word.lower()
                    if len(word) > 0:
                        hist[word] = hist.get(word,0) + 1
        except ImportError:                       # Older Python ver
            line = source.readline()          # Slow but mem-friendly
            while line:
                for word in split(line):
                    word = translate(word, trans, punctuation+digits)
                    word=word.lower()
                    if len(word) > 0:
                        hist[word] = hist.get(word,0) + 1
                line = source.readline()
    else:
        raise TypeError, \
              "source must be a string-like or file-like object"
    return hist
Ejemplo n.º 12
0
def scrape_links_and_wordlistify(links, lower=False, verbose=1):
    import nltk
    import requests
    import string
    raw = ''
    wordlist = {}
    for site in links:
        try:
            if verbose == 1:
                print '[+] fetching data from: ', site
            if site.find('http://pastebin.com/') == 0:
                raw = requests.get(site.replace('http://pastebin.com/', 'http://pastebin.com/raw.php?i=')).content
            else:
                raw = requests.get(site).content
            if lower == False:
                l = string.translate(nltk.clean_html(raw), string.maketrans(string.punctuation, ' ' * 32)).split()
                freq_an(l, wordlist)
            else:
                l = string.lower(nltk.clean_html(raw))
                l = string.translate(l, string.maketrans(string.punctuation, ' ' * 32)).split()
                freq_an(l, wordlist)
        except:
            if verbose == 1:
                print '[-] Skipping url: ', site
    return wordlist
Ejemplo n.º 13
0
 def encripta_maketrans(self, texto):
     # Conjunto dos caracteres do alfabeto sob a lógica do Rot13
     alfabeto = string.maketrans("ABCDEFGHIJKLMabcdefghijklmNOPQRSTUVWXYZnopqrstuvwxyz", 
         "NOPQRSTUVWXYZnopqrstuvwxyzABCDEFGHIJKLMabcdefghijklm")
     # A função translate que faz a conversão 
     # recebe o que deve ser convertido também o nome da técnica a ser usado que neste caso é o rot_13
     print string.translate(texto, alfabeto)      
Ejemplo n.º 14
0
 def make_code(self):
     symbols = (string.ascii_uppercase + string.digits)
     string.translate(symbols, None, 'OI')
     random.seed()
     code = ''
     for i in range(10):
         code += random.choice(symbols)
     self.code = code
Ejemplo n.º 15
0
def decoder4():
    table = string.maketrans(
        string.ascii_lowercase,
        string.ascii_lowercase[2:] + string.ascii_lowercase[:2]
    )
    print string.translate(codedmessage, table)
    print codedmessage.translate(table)
    print string.translate('map', table)
Ejemplo n.º 16
0
def slp_cmp(a,b):
 try:
  a1 = string.translate(a,trantable)
 except:
  print "sansort.san_cmp. Problem with translate. a=",a.encode('utf-8')
  exit(1)
 b1 = string.translate(b,trantable)
 return cmp(a1,b1)
Ejemplo n.º 17
0
def string_module():
    import string
    s="ab cd ef"
    print string.capwords(s);
    leet=string.maketrans('ace', '123');
    print string.translate(s, leet,'fbd');
    value={'var':'foo'}
    t=string.Template('')
Ejemplo n.º 18
0
def MakeIDXMLConform(id):
	'''
	Make the name/id COLLADA XML/XSD conform.
	See StripString and translateMap docu for more information.
	'''
	if (len(id) > 0 and id[0] == '#'):
		return '#' + string.translate(id[1:], translateMap)
	else:
		return string.translate(id, translateMap)
Ejemplo n.º 19
0
def main2():
    # for improvement
    import string
    offset = 2
    table = string.maketrans(string.ascii_lowercase, 
            string.ascii_lowercase[offset:] + string.ascii_lowercase[:offset])
    src = "g fmnc wms bgblr rpylqjyrc gr zw fylb. rfyrq ufyr amknsrcpq ypc dmp. bmgle gr gl zw fylb gq glcddgagclr ylb rfyr'q ufw rfgq rcvr gq qm jmle. sqgle qrpgle.kyicrpylq() gq pcamkkclbcb. lmu ynnjw ml rfc spj."
    print string.translate(src, table)
    print string.translate("map", table)
Ejemplo n.º 20
0
def slp_cmp(a,b):
 try:
  a1 = string.translate(a,trantable)
 except UnicodeDecodeError as err:
  print "sansort.san_cmp. Problem with translate. a='%s'"%a.encode('utf-8')
  print err
  exit(1)
 b1 = string.translate(b,trantable)
 return cmp(a1,b1)
Ejemplo n.º 21
0
def normalize(ccNumString):
    """Remove all of the non-numbers from the given strings.

    >>> normalize('a-b c235x85')
    '23585'
    """
    allChars=string.maketrans("", "")
    badchars=string.translate(allChars, allChars, string.digits)
    return string.translate(ccNumString, allChars, badchars)
Ejemplo n.º 22
0
def trans_map(s='map'):
    import string
    tb = string.maketrans(text, solutions2())
    # or
    # use string.lowercase
    frm = string.lowercase       # a-z
    to = string.lowercase[2:] + string.lowercase[:2]  # c-zab
    tb = string.maketrans(frm, to)
    print string.translate('map', tb)  # map -> ocr
Ejemplo n.º 23
0
def MakeIDXMLConform(id):
    """
	Make the name/id COLLADA XML/XSD conform.
	See StripString and translateMap docu for more information.
	"""
    if len(id) > 0 and id[0] == "#":
        return "#" + string.translate(id[1:], translateMap)
    else:
        return string.translate(id, translateMap)
def encode() :
    offset = 10
    ceaser = alphabet[offset:] + alphabet[0:offset]

    trans = maketrans(alphabet, ceaser)
                             
    phrase = raw_input("Enter your plaintext phrase: ")

    print translate(phrase, trans)
Ejemplo n.º 25
0
def twodigit (num):
    word = str(num)
    if (word[0] != '1'):
        new = int(string.translate(word[0],string.maketrans('023456789','066555766')))
        new = new + onedigit(word[1])
        return new
    else:
        new = int(string.translate(word[1],string.maketrans('0123456789','3668877988')))
        return new
Ejemplo n.º 26
0
def fixname(s, removeDots = True):
	ss = s.encode('iso8859_2', 'ignore')		
	if removeDots :		
		x = string.translate(ss, nametrans, '?')
		while x and len(x) and (x[-1] == '.' or x[-1] == ' ') :
			x = x[:-1]
	else :
			x = string.translate(ss, nametrans)
	return x
Ejemplo n.º 27
0
def translate(seq,start,end,FROM,TO):
    # start,end:count from 1.
    six = []
    try:
        trans = string.maketrans('atcgATCG','tagcTAGC')
        if seq[start-1:end].upper()== FROM:
            seq_to = seq[0:start-1]+TO+seq[end:]
            seq_rev = string.translate(seq[::-1],trans)
            seq_to_rev = string.translate(seq_to[::-1],trans)
    
            for i in range(3):
                pep = []
                for j in range(i,len(seq),3):
                    c = seq[j:j+3].upper()
                    if len(c) == 3:
                        pep.append(Codon[c])
                s = start - i
                e = end - i 
                six.append([''.join(pep),int(ceil(s/3.0)),int(ceil(e/3.0))])
            for i in range(3):
                pep = []
                for j in range(i,len(seq_rev),3):
                    c = seq_rev[j:j+3].upper()
                    if len(c) == 3:
                        pep.append(Codon[c])
                s = len(seq_rev) - end + 1 - i
                e = len(seq_rev) - start + 1 - i
                six.append([''.join(pep),int(ceil(s/3.0)),int(ceil(e/3.0))])
    
            end = start + len(TO) - 1
            for i in range(3):
                pep = []
                for j in range(i,len(seq_to),3):
                    c = seq_to[j:j+3].upper()
                    if len(c) == 3:
                        pep.append(Codon[c])
                s = start - i
                e = end - i 
                six.append([''.join(pep),int(ceil(s/3.0)),int(ceil(e/3.0))])
            for i in range(3):
                pep = []
                for j in range(i,len(seq_to_rev),3):
                    c = seq_to_rev[j:j+3].upper()
                    if len(c) == 3:
                        pep.append(Codon[c])
                s = len(seq_to_rev) - end + 1 - i
                e = len(seq_to_rev) - start + 1 - i
                six.append([''.join(pep),int(ceil(s/3.0)),int(ceil(e/3.0))])
            return six
    
        else:
            #print('warning:'+'\t'+seq+'\t'+str(start)+'\t'+str(end)+'\t'+FROM+'\t'+TO)
            return six
    except:
            #print('warning2:'+'\t'+seq+'\t'+str(start)+'\t'+str(end)+'\t'+FROM+'\t'+TO)
            return six
def brute_force() :
    code = raw_input("Enter your code text: ")

    for offset in range (26) :

        ceaser = alphabet[offset:] + alphabet[0:offset]

        trans = maketrans(alphabet, ceaser)
                             
        print translate(code, trans)
Ejemplo n.º 29
0
def textfilter(bytestring):
	import string,re

	norm = string.maketrans('', '') #builds list of all characters
	non_alnum = string.translate(norm, norm, string.letters+string.digits) 
	
	trans_nontext=string.maketrans(non_alnum,'?'*len(non_alnum))
	cleaned=string.translate(bytestring, trans_nontext)
	
	return cleaned
Ejemplo n.º 30
0
def rot13_more_elegant(x):
    '''This performs rot13 encryption using the string module and maketrans
       Argurments: a string'''

    import string

    # Based on the size of this translation table we produce a pep8/lint error
    rot13 = string.maketrans("ABCDEFGHIJKLMabcdefghijklmNOPQRSTUVWXYZnopqrstuvwxyz",
    "NOPQRSTUVWXYZnopqrstuvwxyzABCDEFGHIJKLMabcdefghijklm")
    print string.translate(x, rot13)
Ejemplo n.º 31
0
 def __call__(self, text):
     if self.mapping:
         return string.translate(text, self.mapping)
     else:
         return text
Ejemplo n.º 32
0
import string

x = string.ascii_letters
y = x[1:] + x[0]
tabela = string.maketrans(x, y)

mensagem = "o cruzeiro nao ira cair para serie b"

print string.translate(mensagem, tabela)
Ejemplo n.º 33
0
    def output_toc(self, files, fp=sys.stdout):
        if self.use_xml:
            fp.write('<?xml version="1.0" standalone="no"?>\n')
            fp.write(
                '<!DOCTYPE reference PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"\n'
            )
            fp.write(
                '    "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd">\n'
            )
            #for filename, obj_def in files:
            #    fp.write('  <!ENTITY ' + string.translate(obj_def.c_name,
            #                                              self.__transtable) +
            #             ' SYSTEM "' + filename + '" >\n')
            #fp.write(']>\n\n')

            #fp.write('<reference id="class-reference">\n')
            #fp.write('  <title>Class Documentation</title>\n')
            #for filename, obj_def in files:
            #    fp.write('&' + string.translate(obj_def.c_name,
            #                                    self.__transtable) + ';\n')
            #fp.write('</reference>\n')

            fp.write(
                '<reference id="class-reference" xmlns:xi="http://www.w3.org/2001/XInclude">\n'
            )
            fp.write('  <title>Class Reference</title>\n')
            for filename, obj_def in files:
                fp.write('  <xi:include href="%s"/>\n' % filename)
            fp.write('</reference>\n')
        else:
            fp.write(
                '<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook V4.1.2//EN" [\n'
            )
            for filename, obj_def in files:
                fp.write('  <!ENTITY ' +
                         string.translate(obj_def.c_name, self.__transtable) +
                         ' SYSTEM "' + filename + '" >\n')
            fp.write(']>\n\n')

            fp.write('<book id="index">\n\n')
            fp.write('  <bookinfo>\n')
            fp.write('    <title>PyGTK Docs</title>\n')
            fp.write('    <authorgroup>\n')
            fp.write('      <author>\n')
            fp.write('        <firstname>James</firstname>\n')
            fp.write('        <surname>Henstridge</surname>\n')
            fp.write('      </author>\n')
            fp.write('    </authorgroup>\n')
            fp.write('  </bookinfo>\n\n')

            fp.write('  <chapter id="class-hierarchy">\n')
            fp.write('    <title>Class Hierarchy</title>\n')
            fp.write('    <para>Not done yet</para>\n')
            fp.write('  </chapter>\n\n')

            fp.write('  <reference id="class-reference">\n')
            fp.write('    <title>Class Documentation</title>\n')
            for filename, obj_def in files:
                fp.write('&' +
                         string.translate(obj_def.c_name, self.__transtable) +
                         ';\n')

            fp.write('  </reference>\n')
            fp.write('</book>\n')
Ejemplo n.º 34
0
 def ascii_lower(string):
     """Lower-case, but only in the ASCII range."""
     return string.translate(
         utab if isinstance(string, unicode_type) else tab)
Ejemplo n.º 35
0
D = {}
inFile = open(
    '/netshare1/home1/people/hansun/Data/GenomeSeq/Human/ucsc.hg19.fasta.fa')
while True:
    line1 = inFile.readline().strip()
    line2 = inFile.readline().strip()
    if line1:
        D[line1[1:]] = line2
    else:
        break
inFile.close()

inFile = open('HeLa-Peptide-Validation-gene-dist')
ouFile = open('HeLa-Peptide-Validation-gene-dist-seq', 'w')

for line in inFile:
    line = line.strip()
    fields = line.split()
    fds = fields[0].split(':')
    ch = fds[1]
    start = int(fds[2])
    end = int(fds[3])
    seq = D[ch][start - 1:end]
    seq_rev = seq[::-1]
    seq_comp = string.translate(seq_rev, trans)
    ouFile.write(line + '\n')
    ouFile.write(seq + '\n')
    ouFile.write(seq_comp + '\n')
inFile.close()
ouFile.close()
Ejemplo n.º 36
0
def create_table(datafile, tblname, namefmt='0'):
    ##if len(sys.argv)<2:
    ##	print "\nUsage: csv2tbl.py path/datafile.csv (0,1,2,3 = column name format):"
    ##	print "\nFormat: 0 = TitleCasedWords"
    ##	print "        1 = Titlecased_Words_Underscored"
    ##	print "        2 = lowercase_words_underscored"
    ##	print "        3 = Words_underscored_only (leave case as in source)"
    ##	sys.exit()
    ##else:
    ##	if len(sys.argv)==2:
    ##		dummy, datafile, = sys.argv
    ##		namefmt = '0'
    ##	else: dummy, datafile, namefmt = sys.argv

    namefmt = int(namefmt)
    #outfile = os.path.basename(datafile)
    ##        tblname = os.path.basename(datafile).split('.')[0]
    outfile = os.path.dirname(datafile) + '\\' + tblname + '.sql'

    # Create string translation tables
    allowed = ' _01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
    delchars = ''
    for i in range(255):
        if chr(i) not in allowed: delchars = delchars + chr(i)
    deltable = string.maketrans(' ', '_')

    # Create list of column [names],[widths]
    reader = csv.reader(file(datafile), dialect='excel')
    row = reader.next()
    nc = len(row)
    cols = []
    for col in row:
        # Format column name to remove unwanted chars
        col = string.strip(col)
        col = string.translate(col, deltable, delchars)
        fmtcol = col
        if namefmt < 3:
            # Title case individual words, leaving original upper chars in place
            fmtcol = ''
            for i in range(len(col)):
                if col.title()[i].isupper(): fmtcol = fmtcol + col[i].upper()
                else: fmtcol = fmtcol + col[i]
        if namefmt == 2: fmtcol = col.lower()
        if namefmt == 0:
            fmtcol = string.translate(fmtcol, deltable,
                                      '_')  # Remove underscores

        d = 0
        dupcol = fmtcol
        while dupcol in cols:
            d = d + 1
            dupcol = fmtcol + '_' + str(d)
        cols.append([dupcol, 1])

    # Determine max width of each column in each row
    rc = 0
    for row in reader:
        rc = rc + 1
        if len(row) == nc:
            for i in range(len(row)):
                fld = string.strip(row[i])
                if len(fld) > cols[i][1]:
                    cols[i][1] = len(fld)
        else:
            print 'Warning: Line %s ignored. Different width than header' % (
                rc)

    print

    sql = 'CREATE TABLE %s\n(' % (tblname)
    ##        types = get_types(open(datafile))
    for ind, col in enumerate(cols):
        sql = sql + ('\n\t%s VARCHAR(%s),' % (col[0], col[1]))
    print sql[:len(sql) - 1] + ');'
Ejemplo n.º 37
0
def translate_longopt(opt):
    """Convert a long option name to a valid Python identifier by
    changing "-" to "_".
    """
    return string.translate(opt, longopt_xlate)
Ejemplo n.º 38
0
def rm_punctuation(s):
    return string.translate(s.encode("utf-8"), None, nonletter).strip()
Ejemplo n.º 39
0
def irc_lower(s):
    return string.translate(s, _ircstring_translation)
Ejemplo n.º 40
0
#!/usr/bin/env python

import string
"""
http://butter:[email protected]/pc/hex/bonus.html
"""

hint = "va gur snpr bs jung?"

shift = 13
lc = string.ascii_lowercase
trans = string.maketrans(lc, lc[shift:] + lc[:shift])
print string.translate(hint, trans)

# in the face of what?

# import this -> In the face of ambiguity
print
import this
print

print 'http://*****:*****@www.pythonchallenge.com/pc/hex/ambiguity.html'
Ejemplo n.º 41
0
 def __check_vid(self, video_link):
     if re.match('uggc', video_link):
         video_link = string.translate(video_link, rot13)
         video_link = video_link[:-7] + video_link[-4:]
     return video_link
Ejemplo n.º 42
0
def cp850_to_iso8859(data):
    return string.translate(data, cp850_iso_tbl)
Ejemplo n.º 43
0
          "so it is okay if you want to just give up"))

for target, origin in seeds:
    for i in range(len(origin)):
        dict[target[i]] = origin[i]

for letter in string.ascii_lowercase:
    if letter not in dict.keys():
        key = letter
        break

for letter in string.ascii_lowercase:
    if letter not in dict.values():
        value = letter
        break

dict[key] = value

trans_table = string.maketrans("".join(dict.keys()), "".join(dict.values()))

fin = open("a-input.txt", "rb")
count = int(fin.readline())
fout = open("a-out.txt", "wb")

for i in range(1, count + 1):
    line = fin.readline()
    fout.write(("Case #%d: " % i) + string.translate(line, trans_table))

fin.close()
fout.close()
Ejemplo n.º 44
0
if clearup:
    exit(0)

# Use a fresh one for this mapping
newLoopDev = os.popen("losetup -f").readlines()[0].strip()

# Get pass
try:
    table = string.maketrans(
        'nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM',
        'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
    host = 'localhost'
    port = 50000
    size = 1024
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.connect((host, port))
    data = s.recv(size)
    s.close()
    passwd = string.translate(data, table)
except:
    print "Gates are closed..."
    exit(1)

newCryptDev = ""
for i in xrange(0, 38):
    newCryptDev += random.choice("thequickbrownfoxjumpsoverthelazydog")
mysystem("losetup " + newLoopDev + " " + sys.argv[1])
mysystem("echo " + passwd + "| cryptsetup -c aes-plain -h sha512 create " +
         newCryptDev + " " + newLoopDev)
mysystem("mount /dev/mapper/" + newCryptDev + " " + sys.argv[2])
Ejemplo n.º 45
0
    Morbi eu leo a dolor aliquet dictum. Suspendisse condimentum mauris non ipsum rhoncus, sit amet hendrerit augue
    gravida. Quisque facilisis pharetra felis faucibus gravida. In arcu neque, gravida ut fermentum ut, placerat eu
    quam. Nullam aliquet lectus mauris, quis dignissim est mollis sed. Ut vestibulum laoreet eros quis cursus. Proin
    commodo eros in mollis mollis. Mauris bibendum cursus nibh, sit amet eleifend mauris luctus vitae. Sed aliquet
    pretium tristique. Morbi ultricies augue a lacinia porta. Nullam mollis erat non imperdiet imperdiet. Etiam
    tincidunt fringilla ligula, in adipiscing libero viverra eu. Nunc gravida hendrerit massa, in pellentesque nunc
    dictum id.
    """

if PY3:
    split_words = lambda f: list(
        set(f.lower().translate(str.maketrans("", "", punctuation)).split()))
else:
    split_words = lambda f: list(
        set(
            translate(f.lower(), maketrans(punctuation, ' ' * len(punctuation))
                      ).split()))

split_sentences = lambda f: f.split('?')
change_date = lambda: bool(random.randint(0, 1))

WORDS = split_words(FACTORY)
SENTENCES = split_sentences(FACTORY)
NUM_ITEMS = 50


def fix_image(image):
    """
    Fixes the image path.

    :param string image: Image path.
    :return string: Fixed image path.
Ejemplo n.º 46
0
 def generate_rrd_name_from(cls, string):
     return string.translate(cls.translationTable)
Ejemplo n.º 47
0
def make_string_permuter(reference_string, permuted_string):
    return lambda s: string.translate(
        s, string.maketrans(reference_string, permuted_string))
Ejemplo n.º 48
0
import string
s = "g fmnc wms bgblr rpylqjyrc gr zw fylb. rfyrq ufyr amknsrcpq ypc dmp. bmgle gr gl zw fylb gq glcddgagclr ylb rfyr'q ufw rfgq rcvr gq qm jmle. sqgle qrpgle.kyicrpylq() gq pcamkkclbcb. lmu ynnjw ml rfc spj."
t = string.maketrans(string.ascii_lowercase,string.ascii_lowercase[2:]+string.ascii_lowercase[:2])
print string.translate(s, t)
Ejemplo n.º 49
0
    def score(self, input_data, offset=0):
        """Returns a score of the profile against input_data (Profile or Seq).

        seq: Profile or Sequence object (or string)
        offset: starting index for searching in seq/profile

        Returns the score of the profile against all possible subsequences/
        subprofiles of the input_data. 
    
        This method determines how well a profile fits at different places
        in the sequence. This is very useful when the profile is a motif and 
        you want to find the position in the sequence that best matches the
        profile/motif. 

        Sequence Example:
        =================
            T   C   A   G
        0   .2  .4  .4  0
        1   .1  0   .9  0
        2   .1  .2  .3  .4

        Sequence: TCAAGT

        pos 0: TCA -> 0.5
        pos 1: CAA -> 1.6
        pos 2: AAG -> 1.7
        pos 3: AGT -> 0.5

        So the subsequence starting at index 2 in the sequence has the 
        best match with the motif

        Profile Example:
        ================
        Profile: same as above
        Profile to score:
            T   C   A   G
        0   1   0   0   0
        1   0   1   0   0   
        2   0   0   .5  .5
        3   0   0   0   1   
        4   .25 .25 .25 .25
       
        pos 0: rows 0,1,2 -> 0.55
        pos 1: rows 1,2,3 -> 1.25
        pos 2: rows 2,3,4 -> 0.45
        """

        #set up some local variables
        data = self.Data
        pl = len(data)  #profile length
        is_profile = False

        #raise error if profile is empty
        if not data.any():
            raise ProfileError, "Can't score an empty profile"

        #figure out what the input_data type is
        if isinstance(input_data, Profile):
            is_profile = True
            to_score_length = len(input_data.Data)
            #raise error if CharOrders don't match
            if self.CharOrder != input_data.CharOrder:
                raise ProfileError, "Profiles must have same character order"
        else:  #assumes it get a sequence
            to_score_length = len(input_data)

        #Profile should fit at least once in the sequence/profile_to_score
        if to_score_length < pl:
            raise ProfileError,\
            "Sequence or Profile to score should be at least %s "%(pl)+\
            "characters long, but is %s."%(to_score_length)
        #offset should be valid
        if not offset <= (to_score_length - pl):
            raise ProfileError, "Offset must be <= %s, but is %s"\
            %((to_score_length-pl), offset)

        #call the apropriate scoring function
        if is_profile:
            return self._score_profile(input_data, offset)
        else:
            #translate seq to indices
            if hasattr(self, '_translation_table'):
                seq_indices = array(map(ord,translate(str(input_data),\
                    self._translation_table)))
            else:  #need to figure out where each item is in the charorder
                idx = self.CharOrder.index
                seq_indices = array(map(idx, input_data))
            #raise error if some sequence characters are not in the CharOrder
            if (seq_indices > len(self.CharOrder)).any():
                raise ProfileError,\
                "Sequence contains characters that are not in the "+\
                "CharOrder"
            #now the profile is scored against the list of indices
            return self._score_indices(seq_indices, offset)
Ejemplo n.º 50
0
def main( argv = None ):
    if argv == None: argv = sys.argv

    parser = E.OptionParser( version = "%prog version: $Id: fasta2fasta.py 2782 2009-09-10 11:40:29Z andreas $", 
                             usage = globals()["__doc__"])

    parser.add_option("-m", "--method", dest="methods", type="choice", action="append",
                      choices=("translate", 
                               "translate-to-stop",
                               "truncate-at-stop",
                               "back-translate",
                               "mark-codons",
                               "apply-map", 
                               "build-map",
                               "pseudo-codons", 
                               "interleaved-codons",
                               "map-codons",
                               "remove-gaps", 
                               "mask-seg", 
                               "mask-bias", 
                               "mask-codons", 
                               "mask-incomplete-codons",
                               "mask-stops", 
                               "mask-soft", 
                               "remove-stops",
                               "upper", 
                               "lower",
                               "reverse-complement",
                               "sample",
                               "shuffle"),
                      help="method to apply to sequences."  )
    
    parser.add_option("-p", "--parameters", dest="parameters", type="string",
                      help="parameter stack for methods that require one [default = %default]."  )

    parser.add_option("-x", "--ignore-errors", dest="ignore_errors", action="store_true",
                      help="ignore errors [default = %default]." )

    parser.add_option("-e", "--exclude", dest="exclude", type="string",
                      help="exclude sequences with ids matching pattern [default = %default]." )

    parser.add_option( "--sample-proportion", dest="sample_proportion", type="float",
                      help="sample proportion [default = %default]." )

    parser.add_option("-n", "--include", dest="include", type="string",
                      help="include sequences with ids matching pattern [default = %default]." )

    parser.add_option("-t", "--type", dest="type", type="choice",
                      choices = ("aa", "na"),
                      help="sequence type (aa or na) [%default]. This option determines which characters to use for masking [default = %default]."  )

    parser.add_option("-l", "--template-identifier", dest="template_identifier", type="string",
                      help="""template for numerical identifier [default = %default] for the operation --build-map. A %i is replaced by the position of the sequence in the file."""  )

    parser.set_defaults(
        methods = [],
        parameters = "",
        type = "na",
        aa_mask_chars = "xX",
        aa_mask_char = "x",
        na_mask_chars = "nN",
        na_mask_char = "n",
        gap_chars = "-.",
        gap_char = "-",
        template_identifier="ID%06i",
        ignore_errors = False,
        exclude = None,
        include = None,
        sample_proportion = None,
        )

    (options, args) = E.Start( parser )
    options.parameters = options.parameters.split(",")

    rx_include, rx_exclude = None, None
    if options.include: rx_include = re.compile( options.include )
    if options.exclude: rx_exclude = re.compile( options.exclude )
    
    iterator = FastaIterator.FastaIterator( options.stdin )

    nseq = 0

    map_seq2nid = {}

    if "apply-map" in options.methods:
        map_seq2nid = IOTools.ReadMap( open( options.parameters[0], "r") )
        del options.parameters[0]

    if options.type == "na":
        mask_chars = options.na_mask_chars
        mask_char = options.na_mask_char
    else:
        mask_chars = options.aa_mask_chars
        mask_char = options.aa_mask_char
        
    if "map-codons" in options.methods:
        map_codon2code = IOTools.ReadMap( open(options.parameters[0], "r") )
        del options.parameters[0]

    if "mask-soft" in options.methods:
        f = options.parameters[0]
        del options.parameters[0]
        hard_masked_iterator = FastaIterator.FastaIterator( open(f, "r") )
        
    if "mask-codons" in options.methods or "back-translate" in options.methods:

        ## open a second stream to read sequences from
        f = options.parameters[0]
        del options.parameters[0]
        
        other_iterator = FastaIterator.FastaIterator( open(f, "r") )

    ninput, noutput, nerrors, nskipped = 0, 0, 0, 0
    
    if "sample" in options.methods:
        if not options.sample_proportion:
            raise ValueError("specify a sample proportion" )
        sample_proportion = options.sample_proportion
    else:
        sample_proportion = None

    while 1:
        try:
            cur_record = iterator.next()
        except StopIteration:
            break

        if cur_record is None: break
        nseq += 1
        ninput += 1
        
        sequence = re.sub( " ", "", cur_record.sequence)
        l = len(sequence)

        if rx_include and not rx_include.search( cur_record.title ):
            nskipped += 1
            continue

        if rx_exclude and rx_exclude.search( cur_record.title ):
            nskipped += 1
            continue

        if sample_proportion:
            if random.random() > sample_proportion:
                continue

        for method in options.methods:

            if method == "translate":
                # translate such that gaps are preserved
                seq = []

                ls = len(re.sub( '[%s]' % options.gap_chars, sequence, "") )

                if ls % 3 != 0:
                    msg = "length of sequence %s (%i) not divisible by 3" % (cur_record.title, ls)
                    nerrors += 1
                    if options.ignore_errors:
                        options.stdlog.write("# ERROR: %s\n" % msg)
                        continue
                    else:
                        raise ValueError, msg

                for codon in [ sequence[x:x+3] for x in range(0, l, 3) ]:
                    aa = Genomics.MapCodon2AA( codon )
                    seq.append( aa )

                sequence = "".join(seq)

            elif method == "back-translate":
                ## translate from an amino acid alignment to codon alignment
                seq = []

                try:
                    other_record = other_iterator.next()
                except StopIteration:
                    raise "run out of sequences."                    

                if cur_record.title != other_record.title:
                    raise "sequence titles don't match: %s %s" % (cur_record.title, other_record.title)

                other_sequence = re.sub( "[ %s]" % options.gap_chars, "", other_record.sequence)

                if len(other_sequence) % 3 != 0:
                    raise ValueError, "length of sequence %s not divisible by 3" % (other_record.title)

                r = re.sub("[%s]" % options.gap_chars, "", sequence)
                if len(other_sequence) != len(r) * 3:
                    raise ValueError, "length of sequences do not match: %i vs %i" % (len(other_sequence), len(r))

                x = 0
                for aa in sequence:
                    if aa in options.gap_chars:
                        c = options.gap_char * 3 
                    else:
                        c = other_sequence[x:x+3]
                        x += 3
                    seq.append( c )

                sequence = "".join(seq)

            elif method == "pseudo-codons":            

                seq = []
                if l % 3 != 0:
                    raise ValueError, "length of sequence %s not divisible by 3" % (cur_record.title)

                for codon in [ sequence[x:x+3] for x in range(0, l, 3) ]:

                    aa = Genomics.MapCodon2AA( codon )
                    seq.append( aa )

                sequence = "   ".join(seq)

            elif method == "reverse-complement":            
                sequence = string.translate( sequence, string.maketrans("ACGTacgt", "TGCAtgca") )[::-1]

            elif method in ("mask-stops", "remove-stops"):
                c = []
                n = 0
                codon = []
                new_sequence = []

                if method == "mask-stops":
                    char = options.na_mask_char
                elif method == "remove-stops":
                    char = options.gap_char

                for x in sequence:

                    if x not in options.gap_chars:
                        codon.append( x.upper() )

                    c.append(x)

                    if len(codon) == 3:
                        codon = "".join(codon).upper()
                        ## mask all non-gaps
                        if Genomics.IsStopCodon( codon ):

                            for x in c:
                                if x in options.gap_chars:
                                    new_sequence.append( x )
                                else:
                                    new_sequence.append( char )
                        else:
                            new_sequence += c

                        c = []
                        codon = []

                new_sequence += c

                sequence = "".join(new_sequence)

            elif method == "mask-soft":
                # Get next hard masked record and extract sequence and length
                try:
                    cur_hm_record = hard_masked_iterator.next()
                except StopIteration:
                    break
                hm_sequence = re.sub( " ", "", cur_hm_record.sequence)
                lhm = len(hm_sequence)
                new_sequence = []
                
                # Check lengths of unmasked and soft masked sequences the same
                if l != lhm:
                    raise ValueError, "length of unmasked and hard masked sequences not identical for record %s" % (cur_record.title)
                
                # Check if hard masked seq contains repeat (N), if so replace N with lowercase sequence from unmasked version
                if sequence==hm_sequence:
                    pass
                else:
                    for x, y in itertools.izip_longest(sequence, hm_sequence):
                        if y=="N":
                            new_sequence += x.lower()
                        else:
                            new_sequence += x.upper()
                sequence = "".join(new_sequence)
                
            elif method == "map-codons":

                seq = []
                if l % 3 != 0:
                    raise ValueError, "length of sequence %s not divisible by 3" % (cur_record.title)

                for codon in [ sequence[x:x+3].upper() for x in range(0, l, 3) ]:

                    if codon not in map_codon2code:
                        aa = "X"
                    else:
                        aa = map_codon2code[ codon ]
                    seq.append( aa )

                sequence = "".join(seq)

            elif method == "interleaved-codons":

                seq = []
                if l % 3 != 0:
                    raise ValueError, "length of sequence %s not divisible by 3" % (cur_record.title)

                for codon in [ sequence[x:x+3] for x in range(0, l, 3) ]:

                    aa = Genomics.MapCodon2AA( codon )
                    seq.append( "%s:%s" % (aa, codon) )

                sequence = " ".join(seq)

            elif method == "translate-to-stop":
                seq = []

                for codon in [ sequence[x:x+3] for x in range(0, l, 3) ]:

                    if Genomics.IsStopCodon( codon ): break

                    aa = Genomics.MapCodon2AA( codon )
                    seq.append( aa )

                sequence = "".join(seq)

            elif method == "truncate-at-stop":
                seq = []

                for codon in [ sequence[x:x+3] for x in range(0, l, 3) ]:

                    if Genomics.IsStopCodon( codon ): break
                    seq.append( codon )

                sequence = "".join(seq)

            elif method == "remove-gaps":

                seq = []
                for s in sequence:
                    if s in options.gap_chars: continue
                    seq.append( s )

                sequence = "".join(seq)

            elif method == "upper":
                sequence = sequence.upper()

            elif method == "lower":
                sequence = sequence.lower()            

            elif method == "mark-codons":
                seq = []
                if l % 3 != 0:
                    raise ValueError, "length of sequence %s not divisible by 3" % (cur_record.title)

                sequence = " ".join([ sequence[x:x+3] for x in range(0, l, 3) ])

            elif method == "apply-map":
                id = re.match("^(\S+)", cur_record.title ).groups()[0]
                if id in map_seq2nid:
                    rest = cur_record.title[len(id):]
                    cur_record.title = map_seq2nid[id] + rest

            elif method == "build-map":
                ## build a map of identifiers
                id = re.match("^(\S+)", cur_record.title ).groups()[0]
                new_id = options.template_identifier % nseq
                if id in map_seq2nid:
                    raise "duplicate fasta entries - can't map those: %s" % id
                map_seq2nid[ id ] = new_id
                cur_record.title = new_id

            elif method == "mask-bias":
                masker= Masker.MaskerBias()
                sequence = masker( sequence )
                
            elif method == "mask-seg":
                masker= Masker.MaskerSeg()
                sequence = masker( sequence )

            elif method == "shuffle":
                s = list(sequence)
                random.shuffle(s)
                sequence = "".join(s)

            elif method == "mask-incomplete-codons":
                seq = list(sequence)
                for x in range(0,l,3):
                    nm = len(filter( lambda x: x in mask_chars, seq[x:x+3]))
                    if 0 < nm < 3:
                        seq[x:x+3] = [ mask_char ] * 3
                sequence = "".join(seq)
                
            elif method == "mask-codons":
                ## mask codons based on amino acids given as reference sequences.
                other_record = other_iterator.next()

                if other_record is None:
                    raise ValueError("run out of sequences.")

                if cur_record.title != other_record.title:
                    raise ValueError("sequence titles don't match: %s %s" % (cur_record.title, other_record.title))

                other_sequence = re.sub( " ", "", other_record.sequence)

                if len(other_sequence) * 3 != len(sequence):
                    raise ValueError("sequences for %s don't have matching lengths %i - %i" % (cur_record.title, len(other_sequence) * 3, len(sequence)))

                seq = list(sequence)
                c = 0
                for x in other_sequence:
                    if x in options.aa_mask_chars:
                        if x.isupper():
                            seq[c:c+3] = [ options.na_mask_char.upper() ] * 3
                        else:
                            seq[c:c+3] = [ options.na_mask_char.lower() ] * 3
                    c += 3

                sequence = "".join(seq)

        options.stdout.write( ">%s\n%s\n" % (cur_record.title, sequence) )
        noutput += 1

    if "build-map" in options.methods:
        p = options.parameters[0]
        if p:
            outfile = open(p, "w")
        else:
            outfile = options.stdout

        outfile.write("old\tnew\n")            
        for old_id, new_id in map_seq2nid.items():
            outfile.write("%s\t%s\n" % (old_id, new_id) )
        if p:
            outfile.close()

    E.info( "ninput=%i, noutput=%i, nskipped=%i, nerrors=%i" % (ninput, noutput, nskipped, nerrors) )
        
    E.Stop()
Ejemplo n.º 51
0
	def rot13(s):
    		rotate = string.maketrans("ABCDEFGHIJKLMabcdefghijklmNOPQRSTUVWXYZnopqrstuvwxyz", "NOPQRSTUVWXYZnopqrstuvwxyzABCDEFGHIJKLMabcdefghijklm")
    		return ''.join(string.translate(s, rotate))
Ejemplo n.º 52
0
 def 英文标点符号转中文标点符号(self,string):
     E_pun = u',.!?[]()<>"\''
     C_pun = u',。!?【】()《》“‘'
     table = {ord(f): ord(t) for f, t in zip(E_pun, C_pun)}
     return string.translate(table)
Ejemplo n.º 53
0
 def get_attr_name(self, long_option):
     """Translate long option name 'long_option' to the form it
     has as an attribute of some object: ie., translate hyphens
     to underscores."""
     return string.translate(long_option, longopt_xlate)
Ejemplo n.º 54
0
def cp1252_to_iso8859(data):
    return string.translate(data, cp1252_iso_tbl)
Ejemplo n.º 55
0
def p_name(name):
    return '_' + string.translate(name, name_trans) + '_Permission'
Ejemplo n.º 56
0
import os
import string

rot13 = string.maketrans(
    "ABCDEFGHIJKLMabcdefghijklmNOPQRSTUVWXYZnopqrstuvwxyz",
    "NOPQRSTUVWXYZnopqrstuvwxyzABCDEFGHIJKLMabcdefghijklm")

fi = open("cities.txt", 'r')
fo = open("t.txt", 'w')
for l in fi.readlines():
    w_line = ""
    for x in l.split(','):
        x = x.strip()
        i = x[-1:]
        x = x[:-1] + string.translate(i, rot13)
        w_line += ',' + x
    fo.write(w_line.strip(',') + '\n')
def remove_punctuation(string):
    return string.translate(table)
Ejemplo n.º 58
0
def make_supertranscript(seq, contig, translation_table, blocks_out, fasta_out,
                         transcripts_out, current_gene_id, starts, ends,
                         transcript_ids, orientations):
    current_gene_id = current_gene_id.replace("\"", "")
    sstarts = sorted(starts.keys())
    sends = sorted(ends.keys())
    i = j = 0
    current_transcripts = set()
    blocks = []  # (start_position, end_position, [transcript_ids])

    last_position = -1
    while i < len(sstarts) and j < len(sends):
        if sstarts[i] == sends[j]:
            if last_position != sstarts[i]:
                blocks.append(
                    (last_position, sstarts[i] - 1, set(current_transcripts)))
            for k in starts[sstarts[i]]:
                current_transcripts.add(transcript_ids[k])
            blocks.append((sstarts[i], sstarts[i], set(current_transcripts)))
            for k in ends[sends[j]]:
                current_transcripts.remove(transcript_ids[k])
            last_position = sstarts[i] + 1
            i += 1
            j += 1
        elif sstarts[i] < sends[j]:
            if last_position > 0 and last_position != sstarts[i]:
                blocks.append(
                    (last_position, sstarts[i] - 1, set(current_transcripts)))
            for k in starts[sstarts[i]]:
                current_transcripts.add(transcript_ids[k])
            last_position = sstarts[i]
            i += 1
        elif sstarts[i] > sends[j]:
            blocks.append((last_position, sends[j], set(current_transcripts)))
            for k in ends[sends[j]]:
                current_transcripts.remove(transcript_ids[k])
            last_position = sends[j] + 1
            j += 1

    while j < len(sends):  # closing blocks
        blocks.append((last_position, sends[j], set(current_transcripts)))
        for k in ends[sends[j]]:
            current_transcripts.remove(transcript_ids[k])
        last_position = sends[j] + 1
        j += 1

    ori = set(orientations)
    if len(ori) > 1:
        exit("error, one gene contains exons in both orientations")

    current_position = 1
    ori = ori.pop()
    blocks_buffer = ""
    fasta_buffer = ""
    last_transcripts = None
    transcripts_to_output = {}
    next_opening = 1
    transcript_buffer = ""

    if ori == "+":
        offset = sstarts[0]
        fasta_header_exons = ">" + current_gene_id + " loc:" + contig + "|" + str(
            blocks[0][0]) + "-" + str(blocks[-1][1]) + "|+ "
        fasta_header_exons += "exons:" + str(blocks[0][0])
        fasta_header_segs = " segs:1"

        for block in blocks:  # (start_position, end_position, [transcript_ids])
            if len(block[2]) > 0:
                end_position = current_position + block[1] - block[0]
                if last_transcripts and block[2] != last_transcripts:
                    blocks_buffer += current_gene_id + "\trtracklayer\texon\t" + str(
                        next_opening
                    ) + "\t" + str(
                        current_position - 1
                    ) + "\t.\t+\t.\tgene_id \"" + current_gene_id + "\"; transcript_id \"" + current_gene_id + "\"; ID \"" + current_gene_id + "\"\n"
                    for transcript in last_transcripts - block[2]:
                        transcripts_to_output[transcript].append(
                            current_position - 1)
                    for transcript in block[2] - last_transcripts:
                        if transcript not in transcripts_to_output:
                            transcripts_to_output[transcript] = [
                                current_position
                            ]
                        else:
                            transcripts_to_output[transcript].append(
                                current_position)
                    last_transcripts = block[2]
                    next_opening = current_position
                elif not last_transcripts:
                    last_transcripts = block[2]
                    for transcript in last_transcripts:
                        transcripts_to_output[transcript] = [1]
                tmp_fasta = get_fasta(seq, contig, block[0], block[1])
                if not tmp_fasta:
                    print("contig " + contig +
                          " from annotation does not exist in the fasta file.")
                    return
                fasta_buffer += tmp_fasta
                current_position = end_position + 1
            else:  # gap
                offset += block[1] - block[0] + 1
                fasta_header_exons += "-" + str(block[0] - 1) + "," + str(
                    block[1] + 1
                )  # -1 and +1 because the exons end and start at positions around the gap
                fasta_header_segs += "-" + str(current_position -
                                               1) + "," + str(current_position)

        for transcript in last_transcripts:
            transcripts_to_output[transcript].append(current_position - 1)
        for transcript in sorted(transcripts_to_output):
            for i in xrange(0, len(transcripts_to_output[transcript]), 2):
                transcript_buffer += current_gene_id + "\tsuperTranscript\texon\t" + str(
                    transcripts_to_output[transcript][i]
                ) + "\t" + str(
                    transcripts_to_output[transcript][i + 1]
                ) + "\t.\t+\t.\tgene_id \"" + current_gene_id + "\"; transcript_id \"" + transcript + "\"\n"
        transcripts_out.write(transcript_buffer)

        blocks_buffer += current_gene_id + "\trtracklayer\texon\t" + str(
            next_opening
        ) + "\t" + str(
            end_position
        ) + "\t.\t+\t.\tgene_id \"" + current_gene_id + "\"; transcript_id \"" + current_gene_id + "\"; ID \"" + current_gene_id + "\"\n"
        blocks_out.write(blocks_buffer)

        fasta_header_segs += "-" + str(current_position - 1)
        write_fasta(fasta_out, fasta_header_exons + "-" + str(blocks[-1][1]),
                    fasta_header_segs, fasta_buffer, seq.line_length)
    elif ori == "-":
        offset = sends[-1]
        fasta_header = ">" + current_gene_id + " loc:" + contig + "|" + str(
            blocks[0][0]) + "-" + str(blocks[-1][1]) + "|- "
        fasta_header_exons = ""
        fasta_header_segs = " segs:1"
        for i in xrange(len(blocks) - 1, -1, -1):
            block = blocks[
                i]  # (start_position, end_position, [transcript_ids])
            if len(block[2]) > 0:
                end_position = current_position + block[1] - block[0]
                if last_transcripts and block[2] != last_transcripts:
                    blocks_buffer += current_gene_id + "\trtracklayer\texon\t" + str(
                        next_opening
                    ) + "\t" + str(
                        current_position - 1
                    ) + "\t.\t+\t.\tgene_id \"" + current_gene_id + "\"; transcript_id \"" + current_gene_id + "\"; ID \"" + current_gene_id + "\"\n"
                    for transcript in last_transcripts - block[2]:
                        transcripts_to_output[transcript].append(
                            current_position - 1)
                    for transcript in block[2] - last_transcripts:
                        if transcript not in transcripts_to_output:
                            transcripts_to_output[transcript] = [
                                current_position
                            ]
                        else:
                            transcripts_to_output[transcript].append(
                                current_position)
                    last_transcripts = block[2]
                    next_opening = current_position
                elif not last_transcripts:
                    last_transcripts = block[2]
                    for transcript in last_transcripts:
                        transcripts_to_output[transcript] = [1]
                tmp_fasta = get_fasta(seq, contig, block[0], block[1])
                if not tmp_fasta:
                    print("contig " + contig +
                          " from annotation does not exist in the fasta file.")
                    return
                fasta_buffer = tmp_fasta + fasta_buffer
                current_position = end_position + 1
            else:  # gap
                offset += block[1] - block[0] + 1
                fasta_header_exons = "-" + str(block[0] - 1) + "," + str(
                    block[1] + 1) + fasta_header_exons
                fasta_header_segs += "-" + str(current_position -
                                               1) + "," + str(current_position)

        for transcript in last_transcripts:
            transcripts_to_output[transcript].append(current_position - 1)
        for transcript in sorted(transcripts_to_output):
            for i in xrange(0, len(transcripts_to_output[transcript]), 2):
                transcript_buffer += current_gene_id + "\tsuperTranscript\texon\t" + str(
                    transcripts_to_output[transcript][i]
                ) + "\t" + str(
                    transcripts_to_output[transcript][i + 1]
                ) + "\t.\t+\t.\tgene_id \"" + current_gene_id + "\"; transcript_id \"" + transcript + "\"\n"
        transcripts_out.write(transcript_buffer)

        blocks_buffer += current_gene_id + "\trtracklayer\texon\t" + str(
            next_opening
        ) + "\t" + str(
            end_position
        ) + "\t.\t+\t.\tgene_id \"" + current_gene_id + "\"; transcript_id \"" + current_gene_id + "\"; ID \"" + current_gene_id + "\"\n"
        blocks_out.write(blocks_buffer)

        fasta_header_exons = fasta_header + "exons:" + str(
            blocks[0][0]) + fasta_header_exons + "-" + str(
                blocks[-1][1])  # [:fasta_header_exons.rfind(",")]
        fasta_header_segs += "-" + str(current_position - 1)
        write_fasta(
            fasta_out, fasta_header_exons, fasta_header_segs,
            string.translate(str(fasta_buffer[::-1]), translation_table),
            seq.line_length)
        # write_fasta(fasta_out, fasta_header_exons, fasta_header_segs, fasta_buffer[::-1].translate(translation_table), seq.line_length)
    else:
        exit("error, invalid orientation " + ori)
Ejemplo n.º 59
0
def _hmac(msg, key):
    if len(key) > 64:
        key = sha.new(key).digest()
    ki = (translate(key, _itrans) + _ipad)[:64]  # inner
    ko = (translate(key, _otrans) + _opad)[:64]  # outer
    return sha.new(ko + sha.new(ki + msg).digest()).digest()
Ejemplo n.º 60
0
def iso8859_to_cp850(data):
    return string.translate(data, iso_cp850_tbl)