def testAppendToBits(self):
     a = Bits(BitArray())
     with self.assertRaises(AttributeError):
         a.append('0b1')
     self.assertEqual(type(a), Bits)
     b = bitstring.ConstBitStream(bitstring.BitStream())
     self.assertEqual(type(b), bitstring.ConstBitStream)
Exemple #2
0
    def read_spk_folder(spk_folder, bin_size=1):
        """
        Loads spike times from all spk files in a given folder.
        The j-th item in the list corresponds to the j-th neuron.
        It is the 1d array of spike times (microsec) for that neuron.

        Parameters
        ----------
        spk_folder : str
            Path containing spk file names
        bin_size : int, optional
            Bin size in milliseconds (default 1)

        Returns
        -------
        spikes : numpy array
            numpy array containing binned spike times
        """
        from bitstring import Bits
        neuron_to_file = []
        time_stamps = []
        bin_size = bin_size or 1
        fns = os.listdir(spk_folder)
        for i, fn in enumerate(fns):
            ext = os.path.splitext(fn)[1]
            if ext in ('.spk', ):  # Blanche spike format
                neuron_to_file.append(fn)
                f = open(os.path.join(spk_folder, fn), 'rb')
                p = Bits(f)
                fmt = str(p.length / 64) + ' * (intle:64)'
                time_stamps.append(p.unpack(fmt))
            spikes = SpkReader.load_from_spikes_times(time_stamps, bin_size=bin_size)
            return Spikes(spikes)
Exemple #3
0
def readCommand(hexbits):
    bits = Bits(bytes=HexToByte(hexbits))
    # print bits
    alarm, state, data1, data2, checksum = bits.unpack("uint:4, uint:4, uint:8, uint:8, uint:8")

    dic = {"state": state, "alarm": alarm, "data1": data1, "data2": data2, "checksum": checksum}
    return dic
def uncompress_golomb_coding(coded_bytes, hash_length, M):
    """Given a bytstream produced using golomb_coded_bytes, uncompress it."""
    ret_list = []
    instream = BitStream(
        bytes=coded_bytes, length=len(coded_bytes) * 8)
    hash_len_bits = hash_length * 8
    m_bits = int(math.log(M, 2))
    # First item is a full hash value.
    prev = instream.read("bits:%d" % hash_len_bits)
    ret_list.append(prev.tobytes())

    while (instream.bitpos + m_bits) <= instream.length:
        # Read Unary-encoded value.
        read_prefix = 0
        curr_bit = instream.read("uint:1")
        while curr_bit == 1:
            read_prefix += 1
            curr_bit = instream.read("uint:1")
        assert curr_bit == 0

        # Read r, assuming M bits were used to represent it.
        r = instream.read("uint:%d" % m_bits)
        curr_diff = read_prefix * M + r
        curr_value_int = prev.uint + curr_diff
        curr_value = Bits(uint=curr_value_int, length=hash_len_bits)
        ret_list.append(curr_value.tobytes())
        prev = curr_value

    return ret_list
Exemple #5
0
    def read_spk_files(spk_files, bin_size=1):
        """
        Loads spike times from a list of spk files.
        The j-th item in the list corresponds to the j-th neuron.
        It is the 1d array of spike times (microsec) for that neuron.

        Parameters
        ----------
        spk_files : list of str
            List of strings containing spk file names
        bin_size : int, optional
            Bin size in milliseconds (default 1)

        Returns
        -------
        spikes : numpy array
            numpy array containing binned spike times
        """
        from bitstring import Bits
        neuron_to_file = []
        time_stamps = []
        bin_size = bin_size or 1

        for fn in spk_files:
            neuron_to_file.append(fn)
            f = open(fn, 'rb')
            p = Bits(f)
            fmt = str(p.length / 64) + ' * (intle:64)'
            time_stamps.append(p.unpack(fmt))
        spikes = SpkReader.load_from_spikes_times(time_stamps, bin_size=bin_size)
        return Spikes(spikes)
 def testUnpack(self):
     s = Bits('0b111000111')
     x, y = s.unpack('3, pad:3, 3')
     self.assertEqual((x, y), (7, 7))
     x, y = s.unpack('2, pad:2, bin')
     self.assertEqual((x, y), (3, '00111'))
     x = s.unpack('pad:1, pad:2, pad:3')
     self.assertEqual(x, [])
Exemple #7
0
def to_comp1(n,v):
   bits_raw=Bits(uint=abs(v),length=n)
   if(v<0):
      bits= (bits_raw.__invert__()).bin
   else:
      bits= bits_raw.bin
   
   return bits
Exemple #8
0
def solve(par):
    N, K = par
    results = []
    for i in range(1 << N):
        b = Bits(int=i, length=N + 1)
        if b.count(1) == K:
            results.append(b.bin[1:])
    return '\n'.join(results)
Exemple #9
0
def getStatusByte(byte1):
    """
    Gets Two First Bytes, and returns a dictionary with:
    Command
    SetGroup
    Address
    """

    bits8 = Bits(bytes=byte1)
    status1, status2 = bits8.unpack("uint:4,uint:4")
    return dict(status1=getSt3st0(status1), status2=getSt7st4(status2))
def uncompress_golomb_coding(coded_bytes, hash_length, M):
    ret_list = []
    instream = BitStream(
            bytes=coded_bytes, length=len(coded_bytes) * hash_length)
    hash_len_bits = hash_length * 8
    m_bits = int(math.log(M, 2))
    prev = instream.read("bits:%d" % hash_len_bits)
    ret_list.append(prev.tobytes())
    while instream.bitpos < instream.length:
        read_prefix = 0
        curr_bit = instream.read("uint:1")
        while curr_bit == 1:
            read_prefix += 1
            curr_bit = instream.read("uint:1")
        assert curr_bit == 0
        r = instream.read("uint:%d" % m_bits)
        curr_diff = read_prefix * M + r
        curr_value_int = prev.uint + curr_diff
        curr_value = Bits(uint=curr_value_int, length=hash_len_bits)
        ret_list.append(curr_value.tobytes())
        prev = curr_value

    return ret_list
Exemple #11
0
 def opcode(self, o):
     self._opcode = Bits(uint=o, length=4)
Exemple #12
0
	def assemble(self):
		return Bits("0b0101, 12*(0b0)").bin
Exemple #13
0
def validateIncoming(byteStr):
    bits32 = Bits(bytes=byteStr)
    first, second, third, fourth = bits32.unpack("bytes:1,bytes:1,bytes:1,bytes:1")
    check = countCheckSumIncoming(first, second, third)
    assert str(check) == "0x" + str(ByteToHex(fourth)).lower()
Exemple #14
0
 def encode(self, value):
     '''
     :param value: value to encode
     '''
     return Bits(bytes=strToBytes(self.fmt % value))
Exemple #15
0
    def build_packet(self):
        """Build the complete encoded packet ready for transmission and return it as a BitArray"""
        packet_words = []

        # stat with the 3 word serial number
        packet_words.extend([Bits(bin=s) for s in self.serial])

        # build the first command word
        cmd1 = BitArray()
        cmd1.append('0b1' if self.pilot else '0b0')  # 1 pilot bit
        cmd1.append(Bits(uint=self.light, length=3))  # 3 light bits
        cmd1.append('0b00')  # 2 zero bits
        cmd1.append('0b1' if self.thermostat else '0b0')  # 1 thermostat bit
        cmd1.append('0b1' if self.power else '0b0')  # 1 power bit
        cmd1.append('0x0')  # 1 zero padding bit
        packet_words.append(cmd1)

        # build the second command word
        cmd2 = BitArray()
        cmd2.append('0b1' if self.front else '0b0')  # 1 pilot bit
        cmd2.append(Bits(uint=self.fan, length=3))  # 3 light bits
        cmd2.append('0b1' if self.aux else '0b0')  # 1 pilot bit
        cmd2.append(Bits(uint=self.flame, length=3))  # 3 light bits
        cmd2.append('0x0')  # 1 zero padding bit
        packet_words.append(cmd2)

        # calculate the first ecc word
        ecc1 = BitArray()
        ecc1_high = (0xD ^ cmd1[0:4].uint ^ (cmd1[0:4].uint << 1) ^
                     (cmd1[4:8].uint << 1)) & 0xF
        ecc1_low = cmd1[0:4].uint ^ cmd1[4:8].uint
        ecc1.append(Bits(uint=ecc1_high, length=4))  # 4 high ecc bits
        ecc1.append(Bits(uint=ecc1_low, length=4))  # 4 low ecc bits
        ecc1.append('0x0')  # 1 zero padding bit
        packet_words.append(ecc1)

        # calculate the second ecc word
        ecc2 = BitArray()
        ecc2_high = (cmd2[0:4].uint ^ (cmd2[0:4].uint << 1) ^
                     (cmd2[4:8].uint << 1)) & 0xF
        ecc2_low = cmd2[0:4].uint ^ cmd2[4:8].uint ^ 0x7
        ecc2.append(Bits(uint=ecc2_high, length=4))  # 4 high ecc bits
        ecc2.append(Bits(uint=ecc2_low, length=4))  # 4 low ecc bits
        ecc2.append('0x0')  # 1 zero padding bit
        packet_words.append(ecc2)

        # convert the packet array to a bit string for encoding
        packet_string = ''
        for word in packet_words:
            packet_string += 'S'  # sync symbol
            packet_string += '1'  # start guard bit
            packet_string += word[0:9].bin  # data
            parity = word.count('0x1') % 2  # calculate parity on all 9 bits
            packet_string += Bits(uint=parity, length=1).bin  # parity bit
            packet_string += '1'  # end guard bit 1
        packet_string += 'Z' * 9  # zero padding at the end, required for burst separation

        logging.debug('packet string:', packet_string)
        logging.debug('packet string length:', len(packet_string))
        for p in packet_string.split('S'):
            if len(p) == 0:
                continue
            print('S{} {} {} {}'.format(p[0:1], p[1:5], p[5:9], p[9:12]))

        # encode the packet in extended thomas manchester codes with sync and zero codes
        manchester_codes = {'S': '11', '0': '01', '1': '10', 'Z': '00'}
        packet_array = [manchester_codes[b] for b in packet_string]
        packet = BitArray()
        for b in packet_array:
            packet.append(Bits(bin=b))

        # return the result
        logging.debug('manchester encoded packet:', packet.bin)
        logging.debug('length:', len(packet.bin))
        return packet
Exemple #16
0
def make_exon_alignment(cursor, ensembl_db_name, human_exon_id, human_exon_known, mitochondrial, 
                        min_similarity,  flank_length):

    sequence_pep = {}
    sequence_dna = {}
    shortest_l = -1 # Uninitialized  leading padding length
    shortest_r = -1 # Uninitialized trailing padding length

    pep_aln_length = 0
    dna_aln_length = 0
    # find all other exons that map to the human exon
    maps    = get_maps(cursor, ensembl_db_name, human_exon_id, human_exon_known)
    maps    = filter (lambda m: not m.exon_id_2 is None, maps)
    maps_sw = filter (lambda m: m.source=='sw_sharp' or m.source=='usearch', maps)

    for map in maps:

        if map.similarity < min_similarity: continue
        # get the raw (unaligned) sequence for the exon that maps onto human
        exon_seqs = get_exon_seqs(cursor, map.exon_id_2, map.exon_known_2, ensembl_db_name[map.species_2])
        if (not exon_seqs):
            print " exon_seqs for" , map.source
            exit(1)
            continue
        [pepseq, pepseq_transl_start, 
         pepseq_transl_end, left_flank, right_flank, dna_seq] = exon_seqs[1:]

        if     len(pepseq)<3: continue
        pepseq_noX = pepseq.replace ('X','')
        if len(pepseq_noX)<3: continue
       

        # check
        dnaseq  = Seq (dna_seq[pepseq_transl_start:pepseq_transl_end], generic_dna)
        if (mitochondrial):
            pepseq2 = dnaseq.translate(table="Vertebrate Mitochondrial").tostring()
        else:
            pepseq2 = dnaseq.translate().tostring()
        

        if (not pepseq == pepseq2):
            continue
            
        # inflate the compressed sequence
        if not map.bitmap:
            continue

        bs = Bits(bytes=map.bitmap)
        if (not bs.count(1) == len(pepseq)): continue # check bitmap has correct number of 1s
        usi = iter(pepseq)
        #reconst_pepseq = "".join(('-' if c=='0' else next(usi) for c in bs.bin))
        reconst_pepseq = ''
        for c in bs.bin:
            if c == '0': reconst_pepseq += '-'
            else:        reconst_pepseq += next(usi)

        # come up with a unique name for this sequence
        species       = map.species_2
        sequence_name = species + "_" + str(map.exon_id_2)+"_"+str(map.exon_known_2)

        if reconst_pepseq: 
            sequence_pep[sequence_name] = reconst_pepseq
            pep_aln_length = len(reconst_pepseq)

            reconst_ntseq = expand_pepseq (reconst_pepseq, exon_seqs[1:], flank_length)
            if reconst_ntseq: 
                sequence_dna[sequence_name] = reconst_ntseq
                dna_aln_length = len(reconst_ntseq)

    # strip common gaps
    sequence_stripped_pep = strip_gaps (sequence_pep)
    if not sequence_stripped_pep:  
        c=inspect.currentframe()
        print " in %s:%d" % ( c.f_code.co_filename, c.f_lineno)
        exit(1)
    # strip common gaps
    sequence_stripped_dna = strip_gaps (sequence_dna)
    if not sequence_stripped_dna:  
        c=inspect.currentframe()
        print " in %s:%d" % ( c.f_code.co_filename, c.f_lineno)
        exit(1)

    return [sequence_stripped_pep, sequence_stripped_dna]
Exemple #17
0
 def testFindAll(self):
     a = Bits('0b0010011')
     b = list(a.findall([1]))
     self.assertEqual(b, [2, 5, 6])
    def test_toilet(self):
        name = 'Ftr_Toilet'

        sys_actor = Actor('EventFlowSystemActor')
        sys_actor.register_action(
            Action(
                ('EventFlowSystemActor', ''),
                'EventFlowActionWaitFrame',
                [Param('WaitFrame', IntType)],
            ))
        sys_actor.actions['EventFlowActionWaitFrame'].mark_used()

        player_actor = Actor('Player')
        player_actor.register_action(
            Action(
                ('Player', ''),
                'EventFlowActionOpenMessageWindow',
                [
                    Param('MessageID', StrType),
                    Param('IsCloseMessageWindow', BoolType)
                ],
            ))
        player_actor.register_action(
            Action(
                ('Player', ''),
                'EventFlowActionPlayerClearFoodPowerup',
                [],
            ))
        player_actor.actions['EventFlowActionOpenMessageWindow'].mark_used()
        player_actor.actions[
            'EventFlowActionPlayerClearFoodPowerup'].mark_used()

        nodes: List[Node] = [
            ActionNode(
                'Event0',
                player_actor.actions['EventFlowActionOpenMessageWindow'], {
                    'MessageID': TypedValue(StrType, 'TalkFtr/FTR_Toilet:001'),
                    'IsCloseMessageWindow': TypedValue(BoolType, False),
                }),
            ActionNode(
                'Event2',
                player_actor.actions['EventFlowActionPlayerClearFoodPowerup'],
                {}),
            ActionNode('Root', sys_actor.actions['EventFlowActionWaitFrame'], {
                'WaitFrame': TypedValue(IntType, 30),
            }),
            ActionNode('Event4', sys_actor.actions['EventFlowActionWaitFrame'],
                       {
                           'WaitFrame': TypedValue(IntType, 33),
                       }),
            RootNode('Root', []),
        ]
        nodes[1].add_out_edge(nodes[3])
        nodes[2].add_out_edge(nodes[1])
        nodes[3].add_out_edge(nodes[0])
        nodes[4].add_out_edge(nodes[2])

        with open('tests/bfevfl/Ftr_Toilet.bfevfl', 'rb') as f:
            expected = f.read()

        file = File('Ftr_Toilet', [sys_actor, player_actor], nodes)
        self.assertEqual(file.prepare_bitstream(), Bits(expected))
import os
import sys

from bitstring import Bits

sys.path.append(os.curdir)

from brangetree.util import iter_zipped_blocks, natural_sort

if len(sys.argv) < 2:
    print("Expected one or more filenames")
    raise SystemExit

paths = sys.argv[1:]
natural_sort(paths)

for filename in paths:
    if not os.path.isfile(filename):
        print("Not found:", filename)
        continue
    fsize = os.path.getsize(filename)

    size = 0
    filled = 0
    for block in iter_zipped_blocks(filename):
        size += len(block) * 8
        filled += Bits(block).count(1)

    print(filename, fsize, size, filled, round(filled / size * 100))
Exemple #20
0
import warnings
from typing import Tuple, Set, Optional, List

from bitstring import Bits

from stegano.textanalyser import DEFAULT_ANALYSIS_FILE
from stegano.textanalyser import DEFAULT_SAMPLE_FILE
from stegano.textanalyser import TextAnalyser

Frequency = int
Symbol = Tuple[str, Frequency]
StringDefinitions = Set[Symbol]

DEFAULT_TREE_FILE = "..\\sample\\tree_article.json"

zero_bit = Bits(bin="0")
one_bit = Bits(bin="1")


class HuffmanTree:
    def __init__(self,
                 left=None,
                 right=None,
                 value: Symbol = None,
                 path_code: Bits = None):
        self.left = left
        self.right = right
        self.value = value
        self.path_code = path_code

    def __eq__(self, other):
Exemple #21
0
def encode_bits_as_strings(tree: HuffmanTree,
                           bits: Bits,
                           string_prefix: str = "") -> Tuple[Bits, str]:
    """
    Given a bit stream and a Huffman tree, return the appropriate
    string of
    symbols.

    The output will match the statistical distribution of the
    sample it was made
    with as much as possible, although limited by the necessity of an
    unambiguous HuffmanTree structure.

    If the Huffman tree does not have path bits to match the input
    exactly, it
    will append 0s until the function can complete.

    :param tree: a Huffman tree with path bits allocated
    :param bits: the input bits
    :param string_prefix: the so-far accumulated string. Leave
    empty when
    calling manually
    :return: a Tuple of the remaining bits and the accumulated
    string made up
    of symbols in the Huffman tree
    """
    if bits is None or bits.__eq__(Bits()):
        return Bits(), string_prefix

    if tree.left is not None and tree.right is not None:
        # This tree has subtrees
        left_tree = tree.left[1]
        right_tree = tree.right[1]

        if left_tree.path_code is None or right_tree.path_code is \
                None:
            raise HuffmanError(
                "When encoding bits as strings, a node was missing "
                "a path code")
        else:
            if bits.startswith(left_tree.path_code):
                remaining_bits, accumulated_string = \
                    encode_bits_as_strings(
                    left_tree, bits, string_prefix)
            elif bits.startswith(right_tree.path_code):
                remaining_bits, accumulated_string = \
                    encode_bits_as_strings(
                    right_tree, bits, string_prefix)
            else:
                # Binary sequence does not match a leaf value. Must
                # pad with 0s
                padded_bits = bits.__add__(zero_bit)
                return padded_bits, string_prefix

            if tree.path_code is None:
                # This tree is a root node
                if bits is None:
                    # We are out of bits, so we can return the
                    # final string
                    return remaining_bits, accumulated_string
                else:  # Continue recursively processing the
                    # remaining bits
                    return encode_bits_as_strings(tree, remaining_bits,
                                                  accumulated_string)
            else:
                return remaining_bits, accumulated_string
    elif tree.left is None and tree.right is None:  # This tree is
        # a leaf node
        if tree.path_code is None:
            raise HuffmanError("When encoding bits as strings, a leaf node was"
                               " missing a path code")
        else:
            if bits.startswith(tree.path_code):
                accumulated_string = string_prefix + tree.value[0]
                if bits.__eq__(tree.path_code):
                    remaining_bits = None
                else:
                    remaining_bits = bits[tree.path_code.length:]
                return remaining_bits, accumulated_string
            else:
                warnings.warn("When encoding bits as strings, some unencodable"
                              " bits were left over")
                return bits, string_prefix
    else:
        raise HuffmanError(
            "The given Huffman tree contained a node with exactly 1 "
            "child tree")
Exemple #22
0
 def _mutate(self):
     new_val = BitArray(self._default_value).copy()
     start, end = self._start_end()
     new_val.invert(range(start, end))
     self.set_current_value(Bits(new_val))
Exemple #23
0
 def evaluate(self, s: Bits) -> Bits:
     random.seed(a=s.uint)  # sets the seed to the given seed
     return Bits(uint=random.getrandbits(2 * n), length=2 * n)
Exemple #24
0
 def create(cls, value, masklen, bitlen, offset):
     field = Bits.__new__(cls, uint=value, length=masklen)
     field.__offset = offset
     field.__bitlen = bitlen
     return field
def main():


    no_threads = 1
    special    = None

    if len(sys.argv) > 1 and  len(sys.argv)<3:
        print "usage: %s <set name> <number of threads> " % sys.argv[0]
        exit(1)
    elif len(sys.argv)==3:

        special = sys.argv[1]
        special = special.lower()
        if special == 'none': special = None

        no_threads = int(sys.argv[2])

    db  = connect_to_mysql()
    cfg = ConfigurationReader()
    cursor = db.cursor()

    # find db ids adn common names for each species db
    [all_species, ensembl_db_name] = get_species (cursor)
    species                        = 'homo_sapiens'
    switch_to_db (cursor,  ensembl_db_name[species])

    if special:
        print "using", special, "set"
        gene_list = get_theme_ids (cursor,  ensembl_db_name, cfg, special )
    else:
        print "using all protein coding genes"
        switch_to_db (cursor,  ensembl_db_name['homo_sapiens'])
        gene_list = get_gene_ids (cursor, biotype='protein_coding', is_known=1)
        
    incomplete = 0
    genes_checked = 0
    #for gene_id in gene_list: 
    #for gene_id in [743609]: 
    for sampling_count in range(1000):
 
        gene_id = choice(gene_list)
        genes_checked += 1
        with_map = 0
        tot      = 0
        switch_to_db (cursor, ensembl_db_name['homo_sapiens'])
        print  gene2stable(cursor, gene_id), get_description (cursor, gene_id)

        # find all exons we are tracking in the database
        human_exons = gene2exon_list(cursor, gene_id)
        human_exons.sort(key=lambda exon: exon.start_in_gene)
        has_a_map = False
        for human_exon in human_exons:
            if (not human_exon.is_canonical or  not human_exon.is_coding): continue
            if verbose:
                print  
                print "\t human",   human_exon.exon_id,  human_exon.is_known
                print "\t ", get_exon_pepseq(cursor, human_exon, ensembl_db_name['homo_sapiens'])
                print "\t checking maps ..."
            maps = get_maps(cursor, ensembl_db_name, human_exon.exon_id, human_exon.is_known)
            tot += 1
            if maps:
                has_a_map = True
                with_map += 1
                #print "ok"
            else:
                print"no maps for exon", human_exon.exon_id
                continue
            if verbose:
                for map in maps:
                    species            = map.species_2
                    exon               = map2exon(cursor, ensembl_db_name, map)
                    unaligned_sequence = get_exon_pepseq(cursor, exon, ensembl_db_name[species])
                    if ( map.similarity):
                        print "\t", species,  map.source, map.exon_id_2, map.exon_known_2
                        print "\tmaps to ",  map.exon_id_1, map.exon_known_1
                        print "\tsim",  map.similarity,
                        print "\tsource",  map.source
                        print "\t", unaligned_sequence
                        if not map.bitmap:
                            print "\t bitmap not assigned"
                        else:
                            bs = Bits(bytes=map.bitmap)
                            reconst_pepseq = ''
                            if (not bs.count(1) == len(unaligned_sequence)): 
                                print "\talnd seq mismatch"
                            
                            else:
                                usi = iter(unaligned_sequence)
                                for c in bs.bin:
                                    if c == '0': reconst_pepseq += '-'
                                    else:        reconst_pepseq += next(usi)
                                print "\tbinary   : ", bs.bin
                                print "\talnd seq: ", reconst_pepseq
                        print
        if not tot== with_map:
            print "####  gene id: %d   total exons: %d     with map:  %d   ( = %d%%) " % \
                (gene_id,  tot,  with_map, int(float(with_map)/tot*100) )
            incomplete += 1

    print "genes checked: %d,  incomplete: %d"  %  (genes_checked, incomplete)
    cursor.close()
    db.close()

    print tot, with_map
Exemple #26
0
def go_to_while():
    global UDP_IP_ADDRESS, UDP_PORT_NO, strt, clo, file_path
    if (strt == 1):
        try:
            serverSock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
            serverSock.bind((UDP_IP_ADDRESS, UDP_PORT_NO))
        except:
            print('The requested address is not valid in its context')
            print(str(UDP_IP_ADDRESS) +  ' or ' + str(UDP_PORT_NO) + ' is not valid')
            print('Please try again.....')
            master.destroy()
            return
        one_t=0
        csv_data=[]
        
        while True:
        
            if (clo == 0):
                break
            
            data_m, addr = serverSock.recvfrom(UDP_PORT_NO)    #port no configured in webpage
            for i in range(len(data_m)):
                re = data_m[i]
                arr.append(re)
                
            header=(arr[0:3])
            print((header[0:3]))
            csv_data.append(header[0])
            csv_data.append(header[1])
            csv_data.append(header[2])
            checksum=extract_val(arr,3,2)
            header_dict['checksum']=checksum
            print('checksum:',checksum)
            csv_data.append(checksum)
            msg_len=extract_val(arr,5,2)
            print("messege length:",msg_len)
            csv_data.append(msg_len)
            week_no=extract_val(arr,7,2)
            print("week no:",week_no)
            csv_data.append(week_no)
            towc=extract_val(arr,9,4)
            print("towc:",towc)
            csv_data.append(towc)
            year=extract_val(arr,13,2)
            print("year:",year)
            csv_data.append(year)
            month=extract_val(arr,15,1)
            print("month:",month)
            csv_data.append(month)
            day=extract_val(arr,16,1)
            print("day:",day)
            csv_data.append(day)
            hour=extract_val(arr,17,1)
            print("hour:",hour)
            csv_data.append(hour)    
            minn=extract_val(arr,18,1)
            print("minn:",minn)
            csv_data.append(minn)
            sec=extract_val(arr,19,1)
            print("sec:",sec)
            csv_data.append(sec)
            mode_of_sys=extract_val(arr,20,1)
            print("mode_of_sys:",mode_of_sys)
            csv_data.append(mode_of_sys)
            if((arr[21]&0x07)==0):
                print("Time reference:internal GNSS rx")
            elif((arr[21]&0x07)==1):
                print("Time reference:External GNSS rx")
            elif((arr[21]&0x07)==2):
                print("Time reference:NTP server")
            elif((arr[21]&0x07)==3):    
                print("Time reference:manual")
            elif((arr[21]&0x07)==4):
              print("Time reference:TWSTFT master")
            time_ref=(arr[21]&0x07)
            csv_data.append(time_ref) 
            osc_lock=(arr[21]>>3 & 0x01)
            if(osc_lock==0):
                print("osc_lock:not locked")
            elif(osc_lock==1):
                print("osc_lock:locked")
            csv_data.append(osc_lock)  
            clk_src=(arr[21]>>4 & 0x01)
            if(clk_src==0):
                print("clk_src:internal")
            elif(osc_lock==1):
                print("clk_src:external")
            csv_data.append(clk_src)    
            Time_init_status=(arr[21]>>5 & 0x01)
            if(Time_init_status==0):
                print("Time_init_status:not initialized")
            elif(Time_init_status==1):
                print("Time_init_status:initialized")
            csv_data.append(Time_init_status)    
            sync_status=(arr[21]>>6 & 0x03)
            if(sync_status==0):
                print("sync_status:sync")
            elif(sync_status==1):
                print("sync_status:no sync")
            elif(sync_status==2):
                print("sync_status:holdover")
            csv_data.append(sync_status)     
            station_pos_mode=(arr[22] & 0x01)
            if(station_pos_mode==0):
                print("station_pos_mode:Manual")
            elif(station_pos_mode==1):
                print("station_pos_mode:External GNSS RX")
            csv_data.append(station_pos_mode)     
            optical_port_output=(arr[22]>>1 & 0x03)
            if(optical_port_output==0):
                print("optical_port_output:1PPS")
            elif(optical_port_output==1):
                print("optical_port_output:10 MHz")
            elif(optical_port_output==2):
                print("optical_port_output:TOD(Time Of Day)")
            csv_data.append(optical_port_output)    
            TX_signal_pow=(arr[22]>>3 & 0x01)
            if(TX_signal_pow==0):
                print("TX_signal_pow:OFF")
            elif(optical_port_output==1):
                print("TX_signal_pow:ON")
            csv_data.append(TX_signal_pow)     
            Local_station_ID=extract_val(arr,25,1)
            print("Local_station_ID:",Local_station_ID)
            csv_data.append(Local_station_ID)
            LS_latitude= extract_float(arr,26,4,1)
            print("LS_latitude:",LS_latitude)
            csv_data.append(LS_latitude)
            LS_longitude= extract_float(arr,30,4,1)
            print("LS_longitude:",LS_longitude)
            csv_data.append(LS_longitude)
            LS_altitude= extract_float(arr,34,4,1)
            print("LS_altitude:",LS_altitude)
            csv_data.append(LS_altitude)
            SA_latitude= extract_float(arr,38,4,1)
            print("SA_latitude:",SA_latitude)
            csv_data.append(SA_latitude)
            SA_longitude= extract_float(arr,42,4,1)
            print("SA_longitude:",SA_longitude)
            csv_data.append(SA_longitude)
            SA_altitude= extract_float(arr,46,4,1)
            print("SA_altitude:",SA_altitude)
            csv_data.append(SA_altitude)
            Tx_frequency=extract_float(arr,50,4,1)
            print("Tx_frequency:",(Tx_frequency))
            csv_data.append(Tx_frequency) 
            
            ########################################################################
            Rx_frequency=extract_float(arr,54,4,1)
            print("Rx_frequency:",(Rx_frequency))
            csv_data.append(Rx_frequency) 
            
            TX_power=extract_val(arr,58,4)  #signed
            binary = bin(TX_power)
            bits = Bits(bin=binary)
            TX_power = bits.int
            print("TX_power:",(TX_power))
            csv_data.append(TX_power)
            
            Time_offset=extract_float(arr,62,8,2)
            print("Time_offset:",Time_offset)
            csv_data.append(Time_offset)  
            
            Freq_offset=extract_float(arr,70,8,2)
            print("Freq_offset:",Freq_offset)
            csv_data.append(Freq_offset)
            
            Osc_control_voltage=extract_val(arr,78,4)
            print("Osc_control_voltage:",Osc_control_voltage)
            csv_data.append(Osc_control_voltage)  
            
            TSPO=extract_val(arr,82,4)
            print("TSPO:",TSPO)
            csv_data.append(TSPO) 
            
            NMEA_baud_rate=extract_val(arr,86,4)
            print("NMEA_baud_rate:",NMEA_baud_rate)
            csv_data.append(NMEA_baud_rate) 
            
            RS422_1PPS_delay=extract_val(arr,90,2)
            print("RS422_1PPS_delay:",RS422_1PPS_delay)
            csv_data.append(RS422_1PPS_delay) 
            
            RS422_10MHz_delay=extract_val(arr,92,2)
            print("RS422_10MHz_delay:",RS422_10MHz_delay)
            csv_data.append(RS422_10MHz_delay)
            
            Offset_wrt_master=extract_val(arr,94,4)
            binary = bin(Offset_wrt_master)
            bits = Bits(bin=binary)
            Offset_wrt_master = bits.int
            print("Offset_wrt_master:",(Offset_wrt_master))
            csv_data.append(Offset_wrt_master)
            
            Fault_id=extract_val(arr,98,4)
            print("Fault_id:",Fault_id)
            csv_data.append(Fault_id)
            
        
            
            
            
            
        #    TX_power=extract_val(arr,54,4)  #signed
        #    binary = bin(TX_power)
        #    bits = Bits(bin=binary)
        #    TX_power = bits.int
        #    print("TX_power:",(TX_power))
        #    csv_data.append(TX_power)    
        #    Time_offset=extract_float(arr,58,8,2)
        #    print("Time_offset:",Time_offset)
        #    csv_data.append(Time_offset)  
        #    Freq_offset=extract_float(arr,66,8,2)
        #    print("Freq_offset:",Freq_offset)
        #    csv_data.append(Freq_offset)   
        #    Osc_control_voltage=extract_val(arr,74,4)
        #    print("Osc_control_voltage:",Osc_control_voltage)
        #    csv_data.append(Osc_control_voltage)   
        #    TSPO=extract_val(arr,78,4)
        #    print("TSPO:",TSPO)
        #    csv_data.append(TSPO)  
        #    NMEA_baud_rate=extract_val(arr,82,4)
        #    print("NMEA_baud_rate:",NMEA_baud_rate)
        #    csv_data.append(NMEA_baud_rate)  
        #    RS422_1PPS_delay=extract_val(arr,86,2)
        #    print("RS422_1PPS_delay:",RS422_1PPS_delay)
        #    csv_data.append(RS422_1PPS_delay) 
        #    RS422_10MHz_delay=extract_val(arr,88,2)
        #    print("RS422_10MHz_delay:",RS422_10MHz_delay)
        #    csv_data.append(RS422_10MHz_delay)
        #    Offset_wrt_master=extract_val(arr,90,4)
        #    binary = bin(Offset_wrt_master)
        #    bits = Bits(bin=binary)
        #    Offset_wrt_master = bits.int
        #    print("Offset_wrt_master:",(Offset_wrt_master))
        #    csv_data.append(Offset_wrt_master)
        #    Fault_id=extract_val(arr,94,4)
        #    print("Fault_id:",Fault_id)
        #    csv_data.append(Fault_id)
        #    
            ####################################channel data 1#####################################
             ##channel 1 data
            PRN_channel1=extract_val(arr,126,1)
            print("PRN_channel1:",PRN_channel1)
            csv_data.append(PRN_channel1)
            
            Channel_Track_Status=extract_val(arr,127,1)
            if(Channel_Track_Status==0):
                print("Channel_Track_Status:IDLE")
            elif(Channel_Track_Status==1):
                print("Channel_Track_Status:Acquisition")        
            elif(Channel_Track_Status==2):
                print("Channel_Track_Status:FLL")  
            elif(Channel_Track_Status==3):
                print("Channel_Track_Status:PLL")       
            elif(Channel_Track_Status==4):
                print("Channel_Track_Status:Reacquisition")  
            csv_data.append(Channel_Track_Status) 
            
            Doppler=extract_float(arr,128,4,1)
            print("Doppler:",Doppler)    
            csv_data.append(Doppler)
            
            Lock_Count= extract_val(arr,132,4)
            print("Lock_Count:",Lock_Count) 
            csv_data.append(Lock_Count)
            
            C_N0=extract_float(arr,136,4,1)
            print("C_N0:",(C_N0)) 
            csv_data.append(C_N0)
            
            Decryption_status=extract_val(arr,140,1)
            print("Decryption_status:",Decryption_status)
            csv_data.append(Decryption_status)
            Authentication_status=extract_val(arr,141,1)
            print("Authentication_status:",Authentication_status)
            csv_data.append(Authentication_status)
            
            LS_time_offset=extract_float(arr,142,8,2)  #it was One_way_offset before
            print("One_way_offset:",LS_time_offset)
            csv_data.append(LS_time_offset)
            RS_time_offset=extract_float(arr,150,8,2)
            print("RS_time_offset:",RS_time_offset)  
            csv_data.append(RS_time_offset)
            
            time_offset=extract_float(arr,158,8,2)
            print("time_offset:",time_offset)  
            csv_data.append(time_offset)
            
            Freq_offset=extract_float(arr,166,8,2)
            print("Freq_offset:",Freq_offset)  
            csv_data.append(Freq_offset)
            
            RS_tx_power=extract_float(arr,174,4,1)
            print("RS_tx_power:",RS_tx_power) 
            csv_data.append(RS_tx_power) 
            
            RS_C_N0=extract_float(arr,178,4,1)
            print("RS_C_N0:",RS_C_N0) 
            csv_data.append(RS_C_N0)
            
            Year1=extract_val(arr,182,2)
            Month1=extract_val(arr,184,1)
            Day1=extract_val(arr,185,1)
            Hour1=extract_val(arr,186,1)
            Min1=extract_val(arr,187,1)
            Sec1=extract_val(arr,188,1)
            print("Last_sync_time:%s.%s.%s.%s/%s/%s"%(Hour1,Min1,Sec1,Day1,Month1,Year1))
            csv_data.append(Year1)
            csv_data.append(Month1)
            csv_data.append(Day1)
            csv_data.append(Hour1) 
            csv_data.append(Min1)
            csv_data.append(Sec1) 
            
            RS_latitude1=extract_float(arr,189,4,1)
            print("RS_latitude1:",RS_latitude1)    
            csv_data.append(RS_latitude1)
            RS_longitude1=extract_float(arr,193,4,1)
            print("RS_longitude1:",RS_longitude1) 
            csv_data.append(RS_longitude1)
            RS_altitude1=extract_float(arr,197,4,1)
            print("RS_altitude1:",RS_altitude1) 
            csv_data.append(RS_altitude1)
        ####################################channel data 1#####################################
            
            
            
            
            
        #    ##channel 1 data
        #    PRN_channel1=extract_val(arr,122,1)
        #    print("PRN_channel1:",PRN_channel1)
        #    csv_data.append(PRN_channel1)
        #    Channel_Track_Status=extract_val(arr,123,1)
        #    if(Channel_Track_Status==0):
        #        print("Channel_Track_Status:IDLE")
        #    elif(Channel_Track_Status==1):
        #        print("Channel_Track_Status:Acquisition")        
        #    elif(Channel_Track_Status==2):
        #        print("Channel_Track_Status:FLL")  
        #    elif(Channel_Track_Status==3):
        #        print("Channel_Track_Status:PLL")       
        #    elif(Channel_Track_Status==4):
        #        print("Channel_Track_Status:Reacquisition")  
        #    csv_data.append(Channel_Track_Status)     
        #    Doppler=extract_float(arr,124,4,1)
        #    print("Doppler:",Doppler)    
        #    csv_data.append(Doppler)
        #    Lock_Count= extract_val(arr,128,4)
        #    print("Lock_Count:",Lock_Count) 
        #    csv_data.append(Lock_Count)
        #    C_N0=extract_float(arr,132,4,1)
        #    print("C_N0:",(C_N0)) 
        #    csv_data.append(C_N0)
        #    Decryption_status=extract_val(arr,136,1)
        #    print("Decryption_status:",Decryption_status)
        #    csv_data.append(Decryption_status)
        #    Authentication_status=extract_val(arr,137,1)
        #    print("Authentication_status:",Authentication_status)
        #    csv_data.append(Authentication_status)
        #    One_way_offset=extract_float(arr,138,8,2)
        #    print("One_way_offset:",One_way_offset)
        #    csv_data.append(One_way_offset)
        #    RS_time_offset=extract_float(arr,146,8,2)
        #    print("RS_time_offset:",RS_time_offset)  
        #    csv_data.append(RS_time_offset)
        #    Freq_offset=extract_float(arr,154,8,2)
        #    print("Freq_offset:",Freq_offset)  
        #    csv_data.append(Freq_offset)
        #    RS_tx_power=extract_float(arr,162,4,1)
        #    print("RS_tx_power:",RS_tx_power) 
        #    csv_data.append(RS_tx_power) 
        #    RS_C_N0=extract_float(arr,166,4,1)
        #    print("RS_C_N0:",RS_C_N0) 
        #    csv_data.append(RS_C_N0)
        #    Year1=extract_val(arr,170,2)
        #    Month1=extract_val(arr,172,1)
        #    Day1=extract_val(arr,173,1)
        #    Hour1=extract_val(arr,174,1)
        #    Min1=extract_val(arr,175,1)
        #    Sec1=extract_val(arr,176,1)
        #    print("Last_sync_time:%s.%s.%s.%s/%s/%s"%(Hour1,Min1,Sec1,Day1,Month1,Year1))
        #    csv_data.append(Year1)
        #    csv_data.append(Month1)
        #    csv_data.append(Day1)
        #    csv_data.append(Hour1) 
        #    csv_data.append(Min1)
        #    csv_data.append(Sec1)  
        #    RS_latitude1=extract_float(arr,177,4,1)
        #    print("RS_latitude1:",RS_latitude1)    
        #    csv_data.append(RS_latitude1)
        #    RS_longitude1=extract_float(arr,181,4,1)
        #    print("RS_longitude1:",RS_longitude1) 
        #    csv_data.append(RS_longitude1)
        #    RS_altitude1=extract_float(arr,185,4,1)
        #    print("RS_altitude1:",RS_altitude1) 
        #    csv_data.append(RS_altitude1)
          
         ###############################channel 2 data######################################
            
            PRN_channel2=extract_val(arr,201,1)
            print("PRN_channel2:",PRN_channel2)
            csv_data.append(PRN_channel2)   
            Channel_Track_Status2=extract_val(arr,202,1)
            if(Channel_Track_Status2==0):
                print("Channel_Track_Status:IDLE")
            elif(Channel_Track_Status2==1):
                print("Channel_Track_Status:Acquisition")        
            elif(Channel_Track_Status2==2):
                print("Channel_Track_Status:FLL")  
            elif(Channel_Track_Status2==3):
                print("Channel_Track_Status:PLL")       
            elif(Channel_Track_Status2==4):
                print("Channel_Track_Status:Reacquisition") 
            csv_data.append(Channel_Track_Status2)     
            Doppler2=extract_float(arr,203,4,1)
            print("Doppler2:",Doppler2) 
            csv_data.append(Doppler2)
            Lock_Count2= extract_val(arr,207,4)
            print("Lock_Count2:",Lock_Count2) 
            csv_data.append(Lock_Count2)
            C_N02=extract_float(arr,211,4,1)
            print("C_N02:",C_N02) 
            csv_data.append(C_N02)  
            Decryption_status2=extract_val(arr,215,1)
            print("Decryption_status2:",Decryption_status2)
            csv_data.append(Decryption_status2)   
            Authentication_status2=extract_val(arr,216,1)
            print("Authentication_status2:",Authentication_status2)
            csv_data.append(Authentication_status2)  
         
            LS_time_offset2=extract_float(arr,217,8,2)
            print("LS_time_offset2:",LS_time_offset2)
            csv_data.append(LS_time_offset2)
            
            RS_time_offset2=extract_float(arr,225,8,2)
            print("RS_time_offset2:",RS_time_offset2)  
            csv_data.append(RS_time_offset2)
             
            time_offset2=extract_float(arr,233,8,2)
            print("time_offset2:",time_offset2)  
            csv_data.append(time_offset2)
         
            Freq_offset2=extract_float(arr,241,8,2)
            print("Freq_offset2:",Freq_offset2)  
            csv_data.append(Freq_offset2)
            RS_tx_power2=extract_float(arr,249,4,1)
            print("RS_tx_power2:",RS_tx_power2) 
            csv_data.append(RS_tx_power2)
            
            RS_C_N02=extract_float(arr,253,4,1)
            print("RS_C_N02:",RS_C_N02) 
            csv_data.append(RS_C_N02)
            Year2=extract_val(arr,257,2)
            Month2=extract_val(arr,259,1)
            Day2=extract_val(arr,260,1)
            Hour2=extract_val(arr,261,1)
            Min2=extract_val(arr,262,1)
            Sec2=extract_val(arr,263,1)
            print("Last_sync_time:%s.%s.%s.%s/%s/%s"%(Hour2,Min2,Sec2,Day2,Month2,Year2))
            csv_data.append(Year2)   
            csv_data.append(Month2) 
            csv_data.append(Day2) 
            csv_data.append(Hour2) 
            csv_data.append(Min2) 
            csv_data.append(Sec2) 
          
            RS_latitude2=extract_float(arr,264,4,1)
            print("RS_latitude2:",RS_latitude2)    
            csv_data.append(RS_latitude2)  
            RS_longitude2=extract_float(arr,268,4,1)
            print("RS_longitude2:",RS_longitude2) 
            csv_data.append(RS_longitude2)  
            RS_altitude2=extract_float(arr,272,4,1)
            print("RS_altitude2:",RS_altitude2) 
            csv_data.append(RS_altitude2)   
            print("\n")
            
            
            
        #    PRN_channel2=extract_val(arr,189,1)
        #    print("PRN_channel2:",PRN_channel2)
        #    csv_data.append(PRN_channel2)   
        #    Channel_Track_Status2=extract_val(arr,190,1)
        #    if(Channel_Track_Status2==0):
        #        print("Channel_Track_Status:IDLE")
        #    elif(Channel_Track_Status2==1):
        #        print("Channel_Track_Status:Acquisition")        
        #    elif(Channel_Track_Status2==2):
        #        print("Channel_Track_Status:FLL")  
        #    elif(Channel_Track_Status2==3):
        #        print("Channel_Track_Status:PLL")       
        #    elif(Channel_Track_Status2==4):
        #        print("Channel_Track_Status:Reacquisition") 
        #    csv_data.append(Channel_Track_Status2)     
        #    Doppler2=extract_float(arr,191,4,1)
        #    print("Doppler:",Doppler2) 
        #    csv_data.append(Doppler2)
        #    Lock_Count2= extract_val(arr,195,4)
        #    print("Lock_Count:",Lock_Count2) 
        #    csv_data.append(Lock_Count2)
        #    C_N02=extract_float(arr,199,4,1)
        #    print("C_N0:",C_N02) 
        #    csv_data.append(C_N02)  
        #    Decryption_status2=extract_val(arr,203,1)
        #    print("Decryption_status:",Decryption_status2)
        #    csv_data.append(Decryption_status2)   
        #    Authentication_status2=extract_val(arr,204,1)
        #    print("Authentication_status:",Authentication_status2)
        #    csv_data.append(Authentication_status2)  
        #    
        #    One_way_offset2=extract_float(arr,205,8,2)
        #    print("One_way_offset:",One_way_offset2)
        #    csv_data.append(One_way_offset2)
        #    
        #    RS_time_offset2=extract_float(arr,213,8,2)
        #    print("RS_time_offset:",RS_time_offset2)  
        #    csv_data.append(RS_time_offset2)
        #    Freq_offset2=extract_float(arr,221,8,2)
        #    print("Freq_offset:",Freq_offset2)  
        #    csv_data.append(Freq_offset2)
        #    RS_tx_power2=extract_float(arr,229,4,1)
        #    print("RS_tx_power:",RS_tx_power2) 
        #    csv_data.append(RS_tx_power2)
        #    RS_C_N02=extract_float(arr,233,4,1)
        #    print("RS_C_N0:",RS_C_N02) 
        #    csv_data.append(RS_C_N02)
        #    Year2=extract_val(arr,237,2)
        #    Month2=extract_val(arr,239,1)
        #    Day2=extract_val(arr,240,1)
        #    Hour2=extract_val(arr,241,1)
        #    Min2=extract_val(arr,242,1)
        #    Sec2=extract_val(arr,243,1)
        #    print("Last_sync_time:%s.%s.%s.%s/%s/%s"%(Hour2,Min2,Sec2,Day2,Month2,Year2))
        #    csv_data.append(Year2)   
        #    csv_data.append(Month2) 
        #    csv_data.append(Day2) 
        #    csv_data.append(Hour2) 
        #    csv_data.append(Min2) 
        #    csv_data.append(Sec2) 
        #  
        #    RS_latitude2=extract_float(arr,244,4,1)
        #    print("RS_latitude2:",RS_latitude2)    
        #    csv_data.append(RS_latitude2)  
        #    RS_longitude2=extract_float(arr,248,4,1)
        #    print("RS_longitude2:",RS_longitude2) 
        #    csv_data.append(RS_longitude2)  
        #    RS_altitude2=extract_float(arr,252,4,1)
        #    print("RS_altitude2:",RS_altitude2) 
        #    csv_data.append(RS_altitude2)   
        #    print("\n")
             ###############################channel 2 data######################################
            time_sec = time.time()
            result = time.localtime(time_sec)
            hhh = result.tm_hour
            mmm = result.tm_min
            sss = result.tm_sec
            final_time = hhh*60*60 + mmm*60 + sss
            #################################CSV File handling############################################
#            file_name = file_path + 'TWSTFT_' + str(final_time) + '_' + str(UDP_PORT_NO)  + '.csv'
            if(one_t==0):
                file_name = file_path + 'TWSTFT_log_' + str(final_time) + '_' + str(UDP_PORT_NO)  + '.csv'
                with open(file_name, 'w+',newline='') as csvFile:
                    writer = csv.writer(csvFile)
                    writer.writerow(row)  
                    writer.writerow(csv_data)
                    one_t=1
                     
            else:
                 with open(file_name, 'a+',newline='') as csvFile:
                     writer = csv.writer(csvFile)
                     writer.writerow(csv_data) 
            
            #############################################################################
            csv_data.clear()
        
            arr.clear()
    else:
        pass
def multiple_exon_alnmt(gene_list, db_info):


    print "process pid: %d, length of gene list: %d" % ( get_process_id(), len(gene_list))

    [local_db, ensembl_db_name] = db_info

    db     = connect_to_mysql()
    cfg    = ConfigurationReader()
    acg    = AlignmentCommandGenerator()
    cursor = db.cursor()

    # find db ids adn common names for each species db
    [all_species, ensembl_db_name] = get_species (cursor)
    

    species  = 'homo_sapiens'
    switch_to_db (cursor,  ensembl_db_name[species])
    gene_ids = get_gene_ids (cursor, biotype='protein_coding', is_known=1)

    # for each human gene
    gene_ct = 0
    tot  = 0
    ok   = 0
    no_maps        = 0
    no_pepseq      = 0
    no_orthologues = 0
    min_similarity = cfg.get_value('min_accptbl_exon_sim')

    #gene_list.reverse()
    for gene_id in gene_list:

        start = time()
        gene_ct += 1
        if  not gene_ct%10: print gene_ct, "genes out of", len(gene_list)

        switch_to_db (cursor, ensembl_db_name['homo_sapiens'])
        print gene_ct, len(gene_ids),  gene_id,  gene2stable(cursor, gene_id), get_description (cursor, gene_id)

        human_exons = filter (lambda e: e.is_known==1 and e.is_coding and e.covering_exon<0, gene2exon_list(cursor, gene_id))
        human_exons.sort(key=lambda exon: exon.start_in_gene)

        ##################################################################
        for human_exon in human_exons:
            
            tot += 1

            # find all orthologous exons the human exon  maps to
            maps = get_maps(cursor, ensembl_db_name, human_exon.exon_id, human_exon.is_known)
            if verbose: 
                print "\texon no.", tot, " id", human_exon.exon_id,
                if not maps: 
                    print " no maps"
                    print human_exon
                print 
            if not maps: 
                no_maps += 1
                continue

  
            # human sequence to fasta:
            seqname   = "{0}:{1}:{2}".format('homo_sapiens', human_exon.exon_id, human_exon.is_known)
            switch_to_db (cursor, ensembl_db_name['homo_sapiens'])
            [exon_seq_id, pepseq, pepseq_transl_start, pepseq_transl_end, 
             left_flank, right_flank, dna_seq] = get_exon_seqs (cursor, human_exon.exon_id, human_exon.is_known)
            if (not pepseq):
                if verbose and  human_exon.is_coding and  human_exon.covering_exon <0: # this should be a master exon
                    print "no pep seq for",  human_exon.exon_id, "coding ", human_exon.is_coding,
                    print "canonical: ",  human_exon.is_canonical
                    print "length of dna ", len(dna_seq)
                no_pepseq += 1
                continue

            # collect seq from all maps, and output them in fasta format
            hassw = False
            headers   = []
            sequences = {}
            exons_per_species = {}

            for map in maps:

                switch_to_db (cursor, ensembl_db_name[map.species_2])
                if map.similarity < min_similarity: continue
                exon    = map2exon(cursor, ensembl_db_name, map)
                pepseq  = get_exon_pepseq (cursor,exon)
                if (not pepseq):
                    continue
                if  map.source == 'sw_sharp':
                    exon_known_code = 2
                    hassw = True
                elif  map.source == 'usearch':
                    exon_known_code = 3
                    hassw = True
                else:
                    exon_known_code = map.exon_known_2
                seqname = "{0}:{1}:{2}".format(map.species_2, map.exon_id_2, exon_known_code)
                headers.append(seqname)
                sequences[seqname] = pepseq
                # for split exon concatenation (see below)
                if not map.species_2 in exons_per_species.keys():
                    exons_per_species[map.species_2] = []
                exons_per_species[map.species_2].append ([ map.exon_id_2, exon_known_code]);
                
                    
            if (len(headers) <=1 ):
                if verbose: print "single species in the alignment"
                no_orthologues += 1
                continue
            
            # concatenate exons from the same gene - the alignment program might go wrong otherwise
            concatenated = concatenate_exons (cursor, ensembl_db_name, sequences, exons_per_species)

            fasta_fnm = "{0}/{1}.fa".format( cfg.dir_path['scratch'], human_exon.exon_id)
            output_fasta (fasta_fnm, sequences.keys(), sequences)

            # align
            afa_fnm  = "{0}/{1}.afa".format( cfg.dir_path['scratch'], human_exon.exon_id)
            mafftcmd = acg.generate_mafft_command (fasta_fnm, afa_fnm)
            ret      = commands.getoutput(mafftcmd)

            if (verbose): print 'almt to', afa_fnm

            # read in the alignment 
            inf = erropen(afa_fnm, "r")
            aligned_seqs = {}
            for record in SeqIO.parse(inf, "fasta"):
                aligned_seqs[record.id] = str(record.seq)
            inf.close()
            # split back the concatenated exons
            if concatenated: split_concatenated_exons (aligned_seqs, concatenated)

            human_seq_seen = False
            for seq_name, sequence in aligned_seqs.iteritems():
                # if this is one of the concatenated seqs, split them back to two

                ### store the alignment as bitstring
                # Generate the bitmap
                bs         = Bits(bin='0b' + re.sub("[^0]","1", sequence.replace('-','0')))
                # The returned value of tobytes() will be padded at the end 
                # with between zero and seven 0 bits to make it byte aligned.
                # I will end up with something that looks like extra alignment gaps, that I'll have to return
                msa_bitmap = bs.tobytes() 
                # Retrieve information on the cognate
                cognate_species, cognate_exon_id, cognate_exon_known = seq_name.split(':')
                if cognate_exon_known == '2':
                    source = 'sw_sharp'
                elif cognate_exon_known == '3':
                    source = 'usearch'
                else:
                    source = 'ensembl'
                if (cognate_species == 'homo_sapiens'):
                    human_seq_seen = True
                cognate_genome_db_id = species2genome_db_id(cursor, cognate_species) # moves the cursor
                switch_to_db(cursor, ensembl_db_name['homo_sapiens']) # so move it back to h**o sapiens
                # Write the bitmap to the database
                #if (cognate_species == 'homo_sapiens'):
                if verbose: # and (source=='sw_sharp' or source=='usearch'):
                    print "storing"
                    print human_exon.exon_id, human_exon.is_known
                    print cognate_species, cognate_genome_db_id, cognate_exon_id, cognate_exon_known, source
                    print sequence
                    if not msa_bitmap:
                        print "no msa_bitmap"
                        continue
                store_or_update(cursor, "exon_map",    {"cognate_genome_db_id":cognate_genome_db_id,
                   "cognate_exon_id":cognate_exon_id   ,"cognate_exon_known"  :cognate_exon_known,
                   "source": source, "exon_id" :human_exon.exon_id, "exon_known":human_exon.is_known},
                  {"msa_bitstring":MySQLdb.escape_string(msa_bitmap)})
                 
            ok += 1
            commands.getoutput("rm "+afa_fnm+" "+fasta_fnm)

        if verbose: print " time: %8.3f\n" % (time()-start);

    print "tot: ", tot, "ok: ", ok
    print "no maps ",   no_pepseq
    print "no pepseq ", no_pepseq
    print "no orthologues  ", no_orthologues
    print
Exemple #28
0
def gprMax_to_dzt(filename, rx, rxcomponent, centerFreq, distTx_Rx,
                  trace_step):

    import h5py as h5
    import os
    import sys
    import struct
    import bitstruct
    import datetime
    from bitstring import Bits
    from scipy import signal

    # ------------------------------- Information specified by the user ---------------------------------------

    # Specify gprMax file path name
    file_path_name = filename

    # Specify center frequency (MHz)
    center_freq = centerFreq

    # Specify Tx-Rx distance
    distance = distTx_Rx

    # Trace step
    trace_step = trace_step

    # Choose E-field component
    comp = rxcomponent

    # ---------------------------------------------------------------------------------------------------------

    # Read gprMax data

    bscan, _, _ = gprMax_Bscan(filename + '.out', rx, rxcomponent)
    data = np.array(bscan)

    # Read time step
    #file = h5.File(filename[0:-4]+'1.out', 'r')
    file = h5.File(filename + '1.out', 'r')
    time_step = file.attrs['dt']
    file.close()

    data = (data * 32767) / np.max(np.abs(data))
    data[data > 32767] = 32767
    data[data < -32768] = -32768
    data = np.round(data)

    # Number of samples and traces
    [noSamples, noTraces] = np.shape(data)

    # Convert time step to ns
    time_step = time_step * 10**9

    # Sampling frequency (MHz)
    sampling_freq = (1 / time_step) * 10**3

    # Time window (ns)
    time_window = time_step * noSamples

    # DZT file name
    fileName = filename

    # Resample data to 1024 samples

    data = signal.resample(data, 1024)
    time_step = time_window / np.shape(data)[0]
    sampling_freq = (1 / time_step) * 10**3

    # ------------------------------------------------ DZT file header -----------------------------------------------------

    tag = 255  # 0x00ff if header, 0xfnff for old file Header
    dataOffset = 1024  # Constant 1024
    noSamples = np.shape(data)[0]  # Number of samples
    bits = 16  # Bits per data word (8 or 16)
    binaryOffset = 32768  # Binary offset (8 bit -> 128, 16 bit -> 32768)
    sps = 0  # Scans per second
    spm = 1 / trace_step  # Scans per metre
    mpm = 0  # Meters per mark
    position = 0  # Position (ns)
    time_window = time_window  # Time window (ns)
    noScans = 0  # Number of passes for 2D files

    dateTime = datetime.datetime.now()  # Current datetime

    # Date and time created
    createdSec = dateTime.second
    if createdSec > 29: createdSec = 29
    createdMin = dateTime.minute
    createdHour = dateTime.hour
    createdDay = dateTime.day
    createdMonth = dateTime.month
    createdYear = dateTime.year - 1980

    # Date and time modified
    modifiedSec = dateTime.second
    if modifiedSec > 29: modifiedSec = 29
    modifiedMin = dateTime.minute
    modifiedHour = dateTime.hour
    modifiedDay = dateTime.day
    modifiedMonth = dateTime.month
    modifiedYear = dateTime.year - 1980

    offsetRG = 0  # Offset to range gain function
    sizeRG = 0  # Size of range gain function
    offsetText = 0  # Offset to text
    sizeText = 0  # Size of text
    offsetPH = 0  # Offset to processing history
    sizePH = 0  # Size of processing history
    noChannels = 1  # Number of channels
    epsr = 5  # Average dielectric constant
    topPosition = 0  # Top position (m)
    vel = (299792458 / np.sqrt(epsr)) * 10**-9
    range0 = vel * (time_window / 2)  # Range (meters)
    xStart = 0  # X start coordinate
    xFinish = noTraces * trace_step - trace_step  # X finish coordinate
    servoLevel = 0  # Gain servo level
    reserved = 0  # Reserved
    antConfig = 0  # Antenna Configuration
    setupConfig = 0  # Setup Configuration
    spp = 0  # Scans per pass
    noLine = 0  # Line number
    yStart = 0  # Y start coordinate
    yFinish = 0  # Y finish coordinate
    lineOrder = 0
    dataType = 2  # Data type

    antennaName = 'antName'
    if len(antennaName) > 14:
        antennaName = antennaName[0:14]
    elif len(antennaName) < 14:
        antennaName = antennaName.ljust(14)

    channelMask = 0  # Channel mask

    fName = fileName  # File name
    if len(fName) > 12:
        fName = fName[0:12]
    elif len(fName) < 12:
        fName = fName.ljust(12)

    checkSum = 0  # Check sum for header

    # -------------------------------------------------------------------------------------------------------------------

    # ----------------------------------------- Convert to bytes and write to file --------------------------------------

    # Open file to write

    with open(fileName + '.dzt', 'wb') as fid:

        # Write header

        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, tag)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, dataOffset)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, noSamples)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, bits)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('s16<', dataStruct, 0, binaryOffset)
        fid.write(dataStruct)
        dataStruct = bytearray(4)
        bitstruct.pack_into('f32<', dataStruct, 0, sps)
        fid.write(dataStruct)
        dataStruct = bytearray(4)
        bitstruct.pack_into('f32<', dataStruct, 0, spm)
        fid.write(dataStruct)
        dataStruct = bytearray(4)
        bitstruct.pack_into('f32<', dataStruct, 0, mpm)
        fid.write(dataStruct)
        dataStruct = bytearray(4)
        bitstruct.pack_into('f32<', dataStruct, 0, position)
        fid.write(dataStruct)
        dataStruct = bytearray(4)
        bitstruct.pack_into('f32<', dataStruct, 0, time_window)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, noScans)
        fid.write(dataStruct)

        sec = Bits(uint=createdSec, length=5)
        min = Bits(uint=createdMin, length=6)
        hour = Bits(uint=createdHour, length=5)
        day = Bits(uint=createdDay, length=5)
        month = Bits(uint=createdMonth, length=4)
        year = Bits(uint=createdYear, length=7)
        b = Bits().join([year, month, day, hour, min, sec])
        createDate = b.tobytes()
        fid.write(bitstruct.pack('>r32<', createDate))

        sec = Bits(uint=modifiedSec, length=5)
        min = Bits(uint=modifiedMin, length=6)
        hour = Bits(uint=modifiedHour, length=5)
        day = Bits(uint=modifiedDay, length=5)
        month = Bits(uint=modifiedMonth, length=4)
        year = Bits(uint=modifiedYear, length=7)
        b = Bits().join([year, month, day, hour, min, sec])
        modifiedDate = b.tobytes()
        fid.write(bitstruct.pack('>r32<', modifiedDate))

        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, offsetRG)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, sizeRG)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, offsetText)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, sizeText)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, offsetPH)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, sizePH)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, noChannels)
        fid.write(dataStruct)
        dataStruct = bytearray(4)
        bitstruct.pack_into('f32<', dataStruct, 0, epsr)
        fid.write(dataStruct)
        dataStruct = bytearray(4)
        bitstruct.pack_into('f32<', dataStruct, 0, topPosition)
        fid.write(dataStruct)
        dataStruct = bytearray(4)
        bitstruct.pack_into('f32<', dataStruct, 0, range0)
        fid.write(dataStruct)
        dataStruct = bytearray(4)
        bitstruct.pack_into('f32<', dataStruct, 0, xStart)
        fid.write(dataStruct)
        dataStruct = bytearray(4)
        bitstruct.pack_into('f32<', dataStruct, 0, xFinish)
        fid.write(dataStruct)
        dataStruct = bytearray(4)
        bitstruct.pack_into('f32<', dataStruct, 0, servoLevel)
        fid.write(dataStruct)
        dataStruct = bytearray(3)
        bitstruct.pack_into('r24<', dataStruct, 0, reserved)
        fid.write(dataStruct)
        dataStruct = bytearray(1)
        bitstruct.pack_into('u8<', dataStruct, 0, antConfig)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, setupConfig)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, spp)
        fid.write(dataStruct)
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, noLine)
        fid.write(dataStruct)
        dataStruct = bytearray(4)
        bitstruct.pack_into('f32<', dataStruct, 0, yStart)
        fid.write(dataStruct)
        dataStruct = bytearray(4)
        bitstruct.pack_into('f32<', dataStruct, 0, yFinish)
        fid.write(dataStruct)
        dataStruct = bytearray(1)
        bitstruct.pack_into('u8<', dataStruct, 0, lineOrder)
        fid.write(dataStruct)
        dataStruct = bytearray(1)
        bitstruct.pack_into('r8<', dataStruct, 0, dataType)
        fid.write(dataStruct)
        fid.write(bitstruct.pack('t14<', antennaName))
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, channelMask)
        fid.write(dataStruct)
        fid.write(bitstruct.pack('t12<', fName))
        dataStruct = bytearray(2)
        bitstruct.pack_into('u16<', dataStruct, 0, checkSum)
        fid.write(dataStruct)

        # Move to 1024 to write data

        fid.seek(dataOffset)
        data = data + binaryOffset
        data = np.array(data, dtype='<H')
        fid.write(data.T.astype('<H').tobytes())

        # Close file

        fid.close()

    print('Dzt file has been written!')
Exemple #29
0
 def testFind(self):
     a = Bits('0xabcd')
     r = a.find('0xbc')
     self.assertEqual(r[0], 4)
     r = a.find('0x23462346246', bytealigned=True)
     self.assertFalse(r)
Exemple #30
0
 def _val_to_bits(conv, val, length):
     if val is -1:
         return Bits(int=-1, length=length)
     return Bits(bytes=conv(val), length=length)
Exemple #31
0
'''
from __future__ import print_function

__author__ = 'Ryan Helinski, Sandia National Laboratories'

from bitstring import Bits

from bientropy.pybientropy import bin_deriv_k
from bientropy import bien, tbien

if __name__ == '__main__':
    # When this file is run as a script, the following tests are executed
    from bientropy.testvectors import BIENTROPY_2BITS, BIENTROPY_4BITS, \
        ORDERING_4BIT, BIENTROPY_8BITS, TBIENTROPY_8BITS

    assert bin_deriv_k(Bits('0b01010101'), 1) == Bits('0b1111111')
    assert bin_deriv_k(Bits('0b00010001'), 3) == Bits('0b11111')
    assert bin_deriv_k(Bits('0b00011111'), 6) == Bits('0b01')

    assert abs(bien(Bits('0b1011')) - 0.95) < 0.01

    assert abs(tbien(Bits('0b1001')) - 0.54) < 0.01

    for s, v in BIENTROPY_2BITS:
        assert abs(bien(s) - v) < 0.01

    for s, v in BIENTROPY_4BITS:
        assert abs(bien(s) - v) < 0.01

    for y in ORDERING_4BIT:
        print(' '.join([
Exemple #32
0
if operation.__eq__("encrypt"):
    input_filename: str = prefix_filename(args.subfolder, args.input)
    output_filename: str = prefix_filename(args.subfolder, args.output)
    key: bytes = None if args.key is None else bytes(args.key,
                                                     encoding=DEFAULT_ENCODING)

    if input_filename is None:
        raise ValueError("Filename for input was not provided.")
    if output_filename is None:
        raise ValueError("Filename for output was not provided.")
    if key is None:
        key = DEFAULT_KEY

    message_bits = read_input_file(input_filename)
    try:
        ciphertext_bits = Bits(bin=message_bits)
    except CreationError:
        raise ValueError("Provided input was not a valid bitstring.")
    if ciphertext_bits.__eq__(Bits()):
        raise ValueError("Provided input was empty.")

    bits = Bits(bytes=Encryptor(key).encrypt_bytes(ciphertext_bits.bytes))
    print("Input encrypted.")

    write_output_file(output_filename, bits.bin)
    print("Ciphertext written to {}".format(output_filename))

elif operation.__eq__("decrypt"):
    input_filename: str = prefix_filename(args.subfolder, args.input)
    output_filename: str = prefix_filename(args.subfolder, args.output)
    key: bytes = None if args.key is None else bytes(args.key,
Exemple #33
0
 def encode(self, value):
     '''
     :type value: ``str``
     :param value: value to encode
     '''
     return Bits(bytes=strToBytes(value))
Exemple #34
0
from bitstring import Bits

X_LOC = 0  #beginning of bits for locations of x's
O_LOC = 81  #beginning of bits for locations of o's
PREV_MOV = 162  #beginning of bits for previous move location
TURN = 171  #bit for player to move
X_VIC = 172  #beginning of bits for x field victories
O_VIC = 181  #beginning of bits for o field victories
X_W = 190  #bit for x won
O_W = 191  #bit for o won
#full bitstring length = 192

VICTORIES = (
    Bits(bin='0b111000000'),  #across top
    Bits(bin='0b000111000'),  #across middle
    Bits(bin='0b000000111'),  #across bottom
    Bits(bin='0b100100100'),  #down left
    Bits(bin='0b010010010'),  #down middle
    Bits(bin='0b001001001'),  #down right
    Bits(bin='0b100010001'),  #diagonal \
    Bits(bin='0b001010100'),  #diagonal /
)


def possible_moves(gamestate):
    moves = []
    lastmove = 9
    for i, b in enumerate(
            gamestate[PREV_MOV:PREV_MOV +
                      9]):  #set lastmove to the location of the previous move
        if b:
Exemple #35
0
 def read(self):
     '''Reads 32 bits of the SPI bus for processing and stores as 32-bit bitstring.'''
     raw_spi = self.spi.transaction(Spibus.reading(4))
     self.data = Bits(bytes=raw_spi[0], length=32)
     self.checkErrors()
Exemple #36
0
def identity(gamestate):
    return Bits(bin=gamestate.bin)
Exemple #37
0
# Requires bitstring module
# $ pip install bitstring

import socket
from time import sleep
from bitstring import Bits

UDP_ADDR = "10.0.0.9"
UDP_PORT = 5040

s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)

while True:
    for servo_value in range(150,600):
        bits = Bits('uint:16=' + str(servo_value))
        print bits.bin
        s.sendto(bits.tobytes(), (UDP_ADDR, UDP_PORT))
        sleep(0.1)
Exemple #38
0
 def huffman_encode(self):
     "encode given text/string with generated tree/code"
     self.string = BitArray()
     for a in self.text:
         self.string.append(Bits(bin=self.code[a]))
     return self.string
Exemple #39
0
 def length(self, l):
     self._length = Bits(uint=self._format_length(l), length=7)
Exemple #40
0
 def output_hoard_riddle_rows(self):
     hoard_riddle_output_file = open(self.hoard_riddle_output_file_name,
                                     'wb')
     hoard_riddle_string = ''.join(self.hoard_riddle_rows)
     Bits(bin=hoard_riddle_string).tofile(hoard_riddle_output_file)
Exemple #41
0
 def testDouble(self):
     a = array.array('d', [0.0, 1.0, 2.5])
     b = Bits(a)
     self.assertEqual(b.length, 192)
     c, d, e = b.unpack('3*floatne:64')
     self.assertEqual((c, d, e), (0.0, 1.0, 2.5))
Exemple #42
0
#!/usr/bin/env python3

import requests
import re
from os import path
from bitstring import Bits

targetUrl = 'https://jupiter.challenges.picoctf.org/static/95be9526e162185c741259a75dffa0ab/whitepages.txt'
targetFilename = 'whitepages.txt'

if not path.exists(targetFilename):
    with open(targetFilename, 'xb') as f:
        r = requests.get(targetUrl)
        f.write(r.content)

with open(targetFilename, 'r') as f:
    fileContent = f.read()

uniqueChars = ''.join(sorted(set(fileContent)))
mapping = {ch: i for ch, i in zip(uniqueChars, reversed(range(2)))}
bits = []
for ch in fileContent:
    bits.append(mapping[ch])
message = Bits(bits).tobytes().decode()
flag = re.search('picoCTF{.*?}', message)[0]
print(f"Flag: {flag}")
def multiple_exon_alnmt(species_list, db_info):


    [local_db, ensembl_db_name] = db_info

    verbose  = False

    db     = connect_to_mysql()
    cfg    = ConfigurationReader()
    acg    = AlignmentCommandGenerator()
    cursor = db.cursor()


    for species in species_list:

        print
        print "############################"
        print  species

        switch_to_db (cursor,  ensembl_db_name[species])
        gene_ids = get_gene_ids (cursor, biotype='protein_coding')
        #gene_ids = get_theme_ids(cursor, cfg, 'wnt_pathway')
        if not gene_ids:
            print "no gene_ids"
            continue


        gene_ct       = 0
        tot           = 0
        ok            = 0
        no_maps       = 0
        no_pepseq     = 0
        no_paralogues = 0
        for gene_id in gene_ids:

            if verbose: start = time()
            gene_ct += 1
            if not gene_ct%100: print species, gene_ct, "genes out of", len(gene_ids)
            if verbose: 
                print
                print gene_id, gene2stable(cursor, gene_id), get_description (cursor, gene_id)

            # get the paralogues - only the representative for  the family will have this 
            paralogues = get_paras (cursor, gene_id)  
            if not paralogues:
                if verbose:  print "\t not a template or no paralogues"
                continue

            if verbose:  print "paralogues: ", paralogues

            # get _all_ exons
            template_exons = gene2exon_list(cursor, gene_id)
            if (not template_exons):
                if verbose: print 'no exons for ', gene_id
                continue

            # find all template  exons we are tracking in the database
            for template_exon in template_exons:

                if verbose: print template_exon.exon_id
                maps = get_maps(cursor, ensembl_db_name, template_exon.exon_id,
                                template_exon.is_known, species=species, table='para_exon_map')

                if not maps:
                    no_maps += 1
                    continue

                # output to fasta:
                seqname        = "{0}:{1}:{2}".format('template', template_exon.exon_id, template_exon.is_known)
                exon_seqs_info =  get_exon_seqs (cursor, template_exon.exon_id, template_exon.is_known)
                if not exon_seqs_info: continue
                [exon_seq_id, pepseq, pepseq_transl_start, pepseq_transl_end, 
                 left_flank, right_flank, dna_seq] = exon_seqs_info
                if (not pepseq):
                    if ( template_exon.is_coding and  template_exon.covering_exon <0): # this should be a master exon
                        print "no pep seq for",  template_exon.exon_id, "coding ", template_exon.is_coding,
                        print "canonical: ",  template_exon.is_canonical
                        print "length of dna ", len(dna_seq)
                        no_pepseq += 1
                    continue
                
                tot += 1

                sequences = {seqname:pepseq}
                headers   = [seqname]
                for map in maps:
                    exon    = map2exon(cursor, ensembl_db_name, map, paralogue=True)
                    pepseq  = get_exon_pepseq (cursor,exon)
                    if (not pepseq):
                        continue
                    seqname = "{0}:{1}:{2}".format('para', map.exon_id_2, map.exon_known_2)
                    headers.append(seqname)
                    sequences[seqname] = pepseq

                fasta_fnm = "{0}/{1}_{2}_{3}.fa".format( cfg.dir_path['scratch'], species, template_exon.exon_id, template_exon.is_known)
                output_fasta (fasta_fnm, headers, sequences)

                if (len(headers) <=1 ):
                    print "single species in the alignment (?)"
                    no_paralogues += 1
                    continue

                # align
                afa_fnm  = "{0}/{1}_{2}_{3}.afa".format( cfg.dir_path['scratch'], species, template_exon.exon_id, template_exon.is_known)
                mafftcmd = acg.generate_mafft_command (fasta_fnm, afa_fnm)
                ret      = commands.getoutput(mafftcmd)

                # read in the alignment
                inf = erropen(afa_fnm, "r")
                if not inf:
                    print gene_id
                    continue
                template_seq_seen = False
                for record in SeqIO.parse(inf, "fasta"):
                    ### store the alignment as bitstring
                    # Generate the bitmap
                    bs         = Bits(bin='0b' + re.sub("[^0]","1", str(record.seq).replace('-','0')))
                    msa_bitmap = bs.tobytes()
                    # Retrieve information on the cognate
                    label, cognate_exon_id, cognate_exon_known = record.id.split(':')
                    if (label == 'template'):
                        template_seq_seen = True
                    # Write the bitmap to the database
                    #print "updating: ", template_exon.exon_id
                    store_or_update(cursor, "para_exon_map", {"cognate_exon_id"    :cognate_exon_id,
                                                         "cognate_exon_known" :cognate_exon_known,
                                                         "exon_id"            :template_exon.exon_id,
                                                         "exon_known"         :template_exon.is_known},
                                    {"msa_bitstring":MySQLdb.escape_string(msa_bitmap)})
                inf.close()
                ok += 1
                commands.getoutput("rm "+afa_fnm+" "+fasta_fnm)
            if verbose: print " time: %8.3f\n" % (time()-start);
 
        outstr  =  species + " done \n"
        outstr +=  "tot: %d   ok: %d  \n" % (tot,  ok)
        outstr +=  "no maps       %d  \n" % no_pepseq
        outstr +=  "no pepseq     %d  \n" % no_pepseq
        outstr +=  "no paralogues %d  \n" % no_paralogues
        outstr += "\n"
        print outstr
params = {}

msgLen, genDegree, generator = taskParameters.split('|')
crcWidth = len(generator) - 1
msgLen = int(msgLen)

#print(str(msgLen)+" "+str(genDegree)+" " +str(generator)+" "+genString)

##########################################
######## GENERATE THE TESTVECTORS ########
##########################################
numVectors = randrange(10, 20)
testVectors = []

for i in range(0, numVectors):
    msg = Bits(uint=randrange(0, 2**msgLen), length=msgLen).bin
    crc = genCRC(msg, generator)

    testVectors.append('("{0}","{1}")'.format(msg, crc))

for i in range(numVectors - 1):
    testVectors[i] += ","

testPattern = ("\n" + 12 * " ").join(testVectors)  #format and join

##########################################
## SET PARAMETERS FOR TESTBENCH TEMPLATE #
##########################################
params.update({
    "CRCWIDTH": crcWidth,
    "MSGLEN": msgLen,
Exemple #45
0
def hamming_distance(str1, str2):
    """Calculate the hamming distance between two byte strings. """
    bit_str1 = BitArray(str1)
    bit_str2 = BitArray(str2)
    return (Bits("0b" + bit_str2.bin) ^ Bits("0b" + bit_str1.bin)).count(True)
Exemple #46
0
 def encode(self, value):
     '''
     :param value: value to encode
     '''
     encoded = strToBytes(value) + b'\x00'
     return Bits(bytes=encoded)
def make_exon_alignment(cursor, ensembl_db_name, human_exon_id, human_exon_known, mitochondrial, 
                        min_similarity,  flank_length, first_human_exon = True):

    sequence_pep = {}
    sequence_dna = {}
    shortest_l = -1 # Uninitialized  leading padding length
    shortest_r = -1 # Uninitialized trailing padding length

    pep_aln_length = 0
    dna_aln_length = 0
    # find all other exons that map to the human exon
    maps    = get_maps(cursor, ensembl_db_name, human_exon_id, human_exon_known)
    maps    = filter (lambda m: not m.exon_id_2 is None, maps)
    maps_sw = filter (lambda m: m.source=='sw_sharp' or m.source=='usearch', maps)

    for map in maps:

        if map.similarity < min_similarity: continue
        # get the raw (unaligned) sequence for the exon that maps onto human
        exon_seqs = get_exon_seqs(cursor, map.exon_id_2, map.exon_known_2, ensembl_db_name[map.species_2])
        if (not exon_seqs):
            #print " exon_seqs for" , map.source
            continue
        [pepseq, pepseq_transl_start, 
         pepseq_transl_end, left_flank, right_flank, dna_seq] = exon_seqs[1:]

        # rpl11 starts with an exon that translates into 2 aa's,
        # rpl10A has a single methionine (or so they say) followed by a split codon
        # *supposedly there is evidence at the protein level
        # but will this give me tons of junk elsewhere? ...
        pepseq_noX = pepseq.replace ('X','')
        if  len(pepseq_noX)<3:
            # if this is the first exon, and if it starts with M, we'll let it off the hook
            # abd then if it's human, we'll also salvage it at any price
            if first_human_exon and pepseq_noX[0] == 'M' or map.species_2=='homo_sapiens': 
                pass
            else:
                continue 
       
        # check
        dnaseq  = Seq (dna_seq[pepseq_transl_start:pepseq_transl_end], generic_dna)
        if (mitochondrial):
            pepseq2 = dnaseq.translate(table="Vertebrate Mitochondrial").tostring()
        else:
            pepseq2 = dnaseq.translate().tostring()
        

        if (not pepseq == pepseq2):
            continue
            
        # inflate the compressed sequence
        if not map.bitmap:
            continue

        bs = Bits(bytes=map.bitmap)
        if (not bs.count(1) == len(pepseq)): continue # check bitmap has correct number of 1s
        usi = iter(pepseq)
        #reconst_pepseq = "".join(('-' if c=='0' else next(usi) for c in bs.bin))
        reconst_pepseq = ''
        for c in bs.bin:
            if c == '0': reconst_pepseq += '-'
            else:        reconst_pepseq += next(usi)

        # come up with a unique name for this sequence
        species       = map.species_2
        # let's also have the start in gene here - might make our lives easier later
        exon2 = get_exon (cursor, map.exon_id_2, map.exon_known_2, ensembl_db_name[species])
        sequence_name = species + "_" + str(map.exon_id_2)+"_"+str(map.exon_known_2)+"_"+str(exon2.start_in_gene)


        if reconst_pepseq: 
            sequence_pep[sequence_name] = reconst_pepseq
            pep_aln_length = len(reconst_pepseq)

            reconst_ntseq = expand_pepseq (reconst_pepseq, exon_seqs[1:], flank_length)
            if reconst_ntseq: 
                sequence_dna[sequence_name] = reconst_ntseq
                dna_aln_length = len(reconst_ntseq)

    # strip common gaps
    sequence_stripped_pep = strip_gaps (sequence_pep)
    if not sequence_stripped_pep:  
        c=inspect.currentframe()
        #print " in %s:%d" % ( c.f_code.co_filename, c.f_lineno)
        return ['','']
    # strip common gaps
    sequence_stripped_dna = strip_gaps (sequence_dna)
    if not sequence_stripped_dna:  
        c=inspect.currentframe()
        #print " in %s:%d" % ( c.f_code.co_filename, c.f_lineno)
        return ['', '']

    return [sequence_stripped_pep, sequence_stripped_dna]
Exemple #48
0
 def encode(self, value, length, signed):
     return Bits(bytes=strToBytes(self._fmt % value))
Exemple #49
0
 def testRfind(self):
     a = Bits('0b11101010010010')
     b = a.rfind('0b010')
     self.assertEqual(b[0], 11)
Exemple #50
0
 def encode(self, value):
     '''
     :param value: value to encode
     '''
     packed = pack(self.fmt, value)
     return Bits(bytes=packed)
Exemple #51
0
 def testCut(self):
     s = Bits(30)
     for t in s.cut(3):
         self.assertEqual(t, [0] * 3)
Exemple #52
0
def bits_to_signed_int(s):
    return Bits(bin=s).int
def _lzw_unpack_compressed(packed_ints):
	ints_count = len(packed_ints) * 8 // _LZW_NUM_SIZE;
	bits = Bits(bytes=packed_ints, length=ints_count * _LZW_NUM_SIZE)
	return [chunk.uint for chunk in bits.cut(_LZW_NUM_SIZE)]
Exemple #54
0
    def decode_compressed(bits, descriptors, n_subsets, operators, descriptor_overlay):
        """
        :param bits: Bit stream to decode from
        :param descriptors: Descriptor iterator
        :param n_subsets: Number of subsets to decode
        :param dict operators: Operators in effect, indexed by opcode
        :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator
        """
        subsets = [[] for x in range(n_subsets)]
        for descriptor in descriptors:
            descriptor = descriptor_overlay.get(descriptor.code, descriptor)

            if isinstance(descriptor, ElementDescriptor):
                op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None)
                if op_crf is not None:
                    dummy_descriptors = iter([ElementDescriptor(fxy2int("999999"), op_crf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC")])
                    _subsets = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {})
                    raw_vals = [subset[0].raw_value for subset in _subsets]

                    if len(set(raw_vals)) != 1:
                        raise ValueError("Encountered different reference values for different subsets: %s", raw_vals)

                    ref_value = raw_vals[0]
                    top_bit_mask = (1 << op_crf.bits()-1)
                    if ref_value & top_bit_mask:
                        ref_value = -(ref_value & ~top_bit_mask)

                    overlay_descriptor = ElementDescriptor(descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit)
                    descriptor_overlay[descriptor.code] = overlay_descriptor
                    continue

                op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None)
                if op_aaf is not None and descriptor.code != fxy2int("031021"):
                    # Don't apply to ASSOCIATED FIELD SIGNIFICANCE
                    # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr
                    dummy_descriptors = iter([ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC")])
                    vals = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {})
                    for i,ss in enumerate(vals):
                        subsets[i].extend(ss)

                read_length = _calculate_read_length(descriptor, operators)
                if descriptor.unit == 'CCITTIA5':
                    ref_value = Bits._readhex(bits, read_length, bits.pos)
                else:
                    ref_value = Bits._readuint(bits, read_length, bits.pos)
                bits.pos += read_length

                n_bits = Bits._readuint(bits, 6, bits.pos)
                bits.pos += 6
                
                for i in range(n_subsets):
                    if descriptor.unit == 'CCITTIA5':
                        n_chars = n_bits
                        if n_chars:
                            raw_value = Bits._readhex(bits, n_chars*8, bits.pos)
                            bits.pos += n_chars*8
                            value = _decode_raw_value(raw_value, descriptor, operators)
                        else:
                            value = _decode_raw_value(ref_value, descriptor, operators)
                    else:
                        if n_bits:
                            increment = Bits._readuint(bits, n_bits, bits.pos)
                            bits.pos += n_bits
                            if increment ^ ((1 << n_bits)-1) == 0: # Missing value, all-ones
                                value = _decode_raw_value((1 << descriptor.length)-1, descriptor, operators)
                            else:
                                value = _decode_raw_value(ref_value + increment, descriptor, operators)
                        else:
                            value = _decode_raw_value(ref_value, descriptor, operators)
                    subsets[i].append(value)
            elif isinstance(descriptor, ReplicationDescriptor):
                aggregations = [[] for x in range(n_subsets)]
                if descriptor.count:
                    bval = None
                    count = descriptor.count
                else:
                    bval = decode_compressed(bits, itertools.islice(descriptors, 1), n_subsets, {}, {})[0][0]
                    count = bval.value
                n_fields = descriptor.fields
                field_descriptors = list(itertools.islice(descriptors, n_fields))

                if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS:
                    # Regular replication, X elements repeated Y or <element value> times in the file
                    for _ in range(count):
                        replication = decode_compressed(bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay)
                        for subset_idx in range(n_subsets):
                            aggregations[subset_idx].append(replication[subset_idx])
                elif bval.descriptor.code in REPETITION_DESCRIPTORS:
                    # Repeated replication, X elements present once in the file, output <element value> times
                    replication = decode_compressed(bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay)
                    for _ in range(count):
                        for subset_idx in range(n_subsets):
                            aggregations[subset_idx].append(replication[subset_idx])
                else:
                    raise ValueError("Unexpected delayed replication element %s" %bval)

                for subset_idx in range(n_subsets):
                    subsets[subset_idx].append(aggregations[subset_idx])
            elif isinstance(descriptor, OperatorDescriptor):
                op = descriptor.operator
                if op.opcode in (1,2,3,4,7):
                    if op.neutral():
                        del operators[op.opcode]
                    else:
                        op.check_conflict(operators)
                        operators[op.opcode] = op
                else:
                    raise NotImplementedError("Can only decode operators 201-204 and 207 for compressed BUFR data at the moment, please file an issue on GitHub, found operator: 2%02d" %op.opcode)
            elif isinstance(descriptor, SequenceDescriptor):
                comp = decode_compressed(bits, iter(descriptor.descriptors), n_subsets, operators, descriptor_overlay)
                for i,subset in enumerate(comp):
                    subsets[i].extend(subset)
            else:
                raise NotImplementedError("Unknown descriptor type: %s" % descriptor)
        return subsets
Exemple #55
0
 def encode(self, value):
     encoded = self._func(strToBytes(value))
     return Bits(bytes=encoded)
Exemple #56
0
    def decode(bits, descriptors, operators, descriptor_overlay):
        """
        :param bits: Bit stream to decode from
        :param descriptors: Descriptor iterator
        :param dict operators: Operators in effect, indexed by opcode
        :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator
        """
        values = []
        for descriptor in descriptors:
            descriptor = descriptor_overlay.get(descriptor.code, descriptor)
            if isinstance(descriptor, ElementDescriptor):
                op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None)
                if op_crf is not None:
                    ref_value = Bits._readuint(bits, op_crf.bits(), bits.pos)
                    bits.pos += op_crf.bits()
                    top_bit_mask = (1 << op_crf.bits()-1)
                    if ref_value & top_bit_mask:
                        ref_value = -(ref_value & ~top_bit_mask)
                    overlay_descriptor = ElementDescriptor(descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit)
                    descriptor_overlay[descriptor.code] = overlay_descriptor
                    continue
                
                op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None)
                if op_aaf is not None and descriptor.code != fxy2int("031021"):
                    # Don't apply to ASSOCIATED FIELD SIGNIFICANCE
                    associated_value = Bits._readuint(bits, op_aaf.bits(), bits.pos)
                    bits.pos += op_aaf.bits()
                    # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr
                    dummy_descriptor = ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC")
                    values.append(BufrValue(associated_value, associated_value, dummy_descriptor))

                read_length = _calculate_read_length(descriptor, operators)
                if descriptor.unit == 'CCITTIA5':
                    raw_value = Bits._readhex(bits, read_length, bits.pos)
                else:
                    raw_value = Bits._readuint(bits, read_length, bits.pos)
                bits.pos += read_length
                values.append(_decode_raw_value(raw_value, descriptor, operators))
            elif isinstance(descriptor, ReplicationDescriptor):
                aggregation = []
                if descriptor.count:
                    bval = None
                    count = descriptor.count
                else:
                    bval = decode(bits, itertools.islice(descriptors, 1), {}, {})[0]
                    count = bval.value
                n_fields = descriptor.fields
                field_descriptors = list(itertools.islice(descriptors, n_fields))
                if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS:
                    # Regular replication, X elements repeated Y or <element value> times in the file
                    for _ in range(count):
                        aggregation.append(decode(bits, iter(field_descriptors), operators, descriptor_overlay))
                elif bval.descriptor.code in REPETITION_DESCRIPTORS:
                    # Repeated replication, X elements present once in the file, output <element value> times
                    repeated_values = decode(bits, iter(field_descriptors), operators, descriptor_overlay)
                    for _ in range(count):
                        aggregation.append(repeated_values)
                else:
                    raise ValueError("Unexpected delayed replication element %s" %bval)
                values.append(aggregation)
            elif isinstance(descriptor, OperatorDescriptor):
                op = descriptor.operator
                if op.immediate:
                    if op.opcode == OpCode.SIGNIFY_CHARACTER:
                        raw_value = Bits._readhex(bits, op.bits(), bits.pos)
                        bits.pos += op.bits()
                        char_descriptor = ElementDescriptor(fxy2int(op.code), op.bits(), 0, 0, "CHARACTER INFORMATION", "CCITTIA5")
                        value = _decode_raw_value(raw_value, char_descriptor, {})
                        values.append(value)
                    elif op.opcode == OpCode.SIGNIFY_LOCAL_DESCRIPTOR:
                        base_descriptor = itertools.islice(descriptors, 1)[0]
                        mod_descriptor = ElementDescriptor(base_descriptor.code, op.bits(), base_descriptor.scale, base_descriptor.ref, base_descriptor.significance, base_descriptor.unit)
                        values.add(decode(bits, descriptors, {}, {})[0].value)
                        
                    else:
                        raise NotImplementedError("Unknown immediate operator: %s" % str(descriptor))
                else:
                    if op.neutral():
                        del operators[op.opcode]
                    else:
                        op.check_conflict(operators)
                        operators[op.opcode] = op
            elif isinstance(descriptor, SequenceDescriptor):
                seq = decode(bits, iter(descriptor.descriptors), operators, descriptor_overlay)
                values.extend(seq)
            else:
                raise NotImplementedError("Unknown descriptor type: %s" % descriptor)
        return values