예제 #1
0
def connect_all(matrix, need_log=False):
    """
    introduction: Integrate index and data from the two-dimensional matrix.

    :param matrix: Data from input.
                   Type: Two-dimensional list(int).

    :param need_log:

    :return new_matrix: Data for output.
                        Type: Two-dimensional list(int).
    """
    m = monitor.Monitor()
    index_binary_length = int(len(str(bin(len(matrix)))) - 2)

    if need_log:
        log.output(log.NORMAL, str(__name__),
                   str(sys._getframe().f_code.co_name),
                   "Add index in the binary matrix.")

    new_matrix = []
    for row in range(len(matrix)):
        if need_log:
            m.output(row, len(matrix))
        new_matrix.append(connect(row, matrix[row], index_binary_length))

    m.restore()

    del matrix, m

    return new_matrix
예제 #2
0
def divide_all(matrix, need_log=False):
    """
    introduction: Separate data from indexes in binary strings.

    :param matrix: The DNA sequence of len(matrix) rows.
                   Type: Two-dimensional list(int).

    :param need_log: need output log.

    :returns index, datas: Obtained data sets and index sets in corresponding locations.
                            Type: One-dimensional list(int), Two-dimensional list(int).
    """
    m = monitor.Monitor()
    index_binary_length = int(len(str(bin(len(matrix)))) - 2)

    if need_log:
        log.output(log.NORMAL, str(__name__),
                   str(sys._getframe().f_code.co_name),
                   "Divide index and data from binary matrix.")

    indexs = []
    datas = []

    for row in range(len(matrix)):
        if need_log:
            m.output(row, len(matrix))
        index, data = divide(matrix[row], index_binary_length)
        indexs.append(index)
        datas.append(data)

    m.restore()

    del matrix, m

    return indexs, datas
예제 #3
0
def write_dna_file(path, dna_sequences, need_log=False):
    """
    introduction: Writing DNA sequence set to documents.

    :param path: File path.
                  Type: string

    :param dna_sequences: Generated DNA sequences.
                          Type: one-dimensional list(string)

    :param need_log: choose to output log file or not.
    """

    m = monitor.Monitor()

    try:
        with open(path, "w") as file:
            if need_log:
                log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name),
                           "Write DNA sequences to file: " + path)
            for row in range(len(dna_sequences)):
                if need_log:
                    m.output(row, len(dna_sequences))
                file.write("".join(dna_sequences[row]) + "\n")
        return dna_sequences
    except IOError:
        log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                   "The file selection operation was not performed correctly. Please execute the operation again!")
예제 #4
0
def sort_order(indexes, data_set, need_log=False):
    """
    introduction: Restore data in order of index.

    :param indexes: The indexes of data set.

    :param data_set: The disordered data set, the locations of this are corresponding to parameter "index".

    :param need_log: need output log.

    :returns matrix: Binary list in correct order.
                      Type: Two-dimensional list(int).
    """
    m = monitor.Monitor()

    if need_log:
        log.output(log.NORMAL, str(__name__),
                   str(sys._getframe().f_code.co_name),
                   "Restore data order according to index.")

    # noinspection PyUnusedLocal
    matrix = [[0 for col in range(len(data_set[0]))]
              for row in range(len(indexes))]

    for row in range(len(indexes)):
        if need_log:
            m.output(row, len(indexes))
        if 0 <= row < len(matrix):
            matrix[indexes[row]] = data_set[row]

    m.restore()

    del indexes, data_set, m

    return matrix
예제 #5
0
def get_yyc_rule_by_index(index, need_logs=False):
    rules = []
    temp_rule1 = ["".join(x) for x in itertools.product("01", repeat=4)]
    temp_rule2 = ["".join(x) for x in itertools.product("01", repeat=16)]

    m = monitor.Monitor()

    if need_logs:
        print("Find all the available Yin-Yang rules.")

    count = 0
    step = 0
    for base in ["A", "T", "C", "G"]:
        for rule1index in range(len(temp_rule1)):
            for rule2index in range(len(temp_rule2)):
                rule1 = list(map(int, list(temp_rule1[rule1index])))
                rule2 = numpy.array(list(map(int, list(temp_rule2[rule2index])))).reshape(4, 4).tolist()
                if _check(rule1, rule2):
                    rules.append(YYCRule(rule1, rule2, base, count))
                    count += 1

                step += 1

                if need_logs:
                    m.output(step, len(temp_rule1) * len(temp_rule2) * 4)

    if index < 0 or index >= len(rules):
        raise ValueError("We have " + str(len(rules)) + " rules, index " + str(index) + " is wrong!")

    if need_logs:
        print("Current Rule is " + str(rules[index].get_info()) + ".")

    return rules[index].get_info()
예제 #6
0
def read_dna_file(path, need_log=False):
    """
    introduction: Reading DNA sequence set from documents.

    :param path: File path.
                  Type: string

    :return dna_sequences: A corresponding DNA sequence string in which each row acts as a sequence.
                           Type: one-dimensional list(string)

    :param need_log: need output log.
    """

    m = monitor.Monitor()

    dna_sequences = []

    try:
        with open(path, "r") as file:
            if need_log:
                log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name),
                           "Read DNA sequences from file: " + path)

            # Read current file by line
            lines = file.readlines()
            for index in range(len(lines)):
                if need_log:
                    m.output(index, len(lines))
                line = lines[index]
                dna_sequences.append([line[col] for col in range(len(line) - 1)])

        return dna_sequences
    except IOError:
        log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                   "The file selection operation was not performed correctly. Please execute the operation again!")
예제 #7
0
    def __init__(self,
                 homopolymer=4,
                 gc_content=0.2,
                 redundancy=0.5,
                 c_dist=0.1,
                 delta=0.5,
                 header_size=4,
                 recursion_depth=10000000,
                 decode_packets=None):
        """
        introduction: The initialization method of FC.

        :param homopolymer: maximum length of homopolymer, type = int.
        :param gc_content: the fraction of gc content above/below 0.5 (0.1 means 0.4-0.6).
        :param redundancy: artificial redundancy for decode successfully (0.5 generate 50% more fragments),type = float.
        :param c_dist: Degree distribution tuning parameter,type = float.
        :param delta: Degree distribution tuning parameter,type = float.
        :param header_size: number of bytes for the header, type = int.
        :param decode_packets: bit segments in the encoding process, type = int.
        """
        self.homopolymer = homopolymer
        self.gc_content = gc_content
        self.redundancy = redundancy
        self.header_size = header_size
        self.recursion_depth = recursion_depth

        self.c_dist = c_dist
        self.delta = delta
        self.decode_packets = decode_packets

        self._init_check()

        self.prng = None
        self.file_size = 0
        self.monitor = monitor.Monitor()
예제 #8
0
def read_binary_from_all(path, segment_length=120, need_log=False):
    """
    introduction: Reading binary matrix from document.

    :param path: File path.
                  Type: string

    :param segment_length: The binary segment length used for DNA sequence generation.
                           Considering current DNA synthesis technique limitation,
                           we usually set 120 as default segment length.

    :param need_log: choose to output log file or not.

    :return matrix: A matrix in which each row represents a binary segment that will be used for DNA sequence generation.
                    Type: two-dimensional list(int)
    """

    m = monitor.Monitor()
    try:

        # Open selected file
        with open(path, mode="rb") as file:

            if need_log:
                log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name),
                           "Read binary matrix from file: " + path)

            size = os.path.getsize(path)

            # Set init storage matrix
            matrix = [[0 for _ in range(segment_length)] for _ in range(math.ceil(size * 8 / segment_length))]

            row = 0
            col = 0
            for byte_index in range(size):
                if need_log:
                    m.output(byte_index, size)
                # Read a file as bytes
                one_byte = file.read(1)
                element = list(map(int, list(str(bin(struct.unpack("B", one_byte)[0]))[2:].zfill(8))))
                for bit_index in range(8):
                    matrix[row][col] = element[bit_index]
                    col += 1
                    if col == segment_length:
                        col = 0
                        row += 1

        if int(len(str(bin(len(matrix)))) - 2) * 7 > segment_length:
            if need_log:
                log.output(log.WARN, str(__name__), str(sys._getframe().f_code.co_name),
                           "The proportion of index in whole sequence may be high. \n"
                           "It is recommended to increase the length of output DNA sequences "
                           "or to divide the file into more segment pools")

        return matrix, size
    except IOError:
        log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                   "The file selection operation was not performed correctly. Please execute the operation again!")
예제 #9
0
    def __init__(self, fixed_huffman=True):
        """
        introduction: The initialization method of Huffman Codec.

        :param fixed_huffman: Declare whether to use the Huffman dictionary in Goldman's paper.
                               In order to reduce the possible loss of function storage, we recommend using this dictionary.
        """
        self.huffman_tree = None
        self.segment_length = 0
        self.fixed_huffman = fixed_huffman
        self.file_size = 0
        self.m = monitor.Monitor()
예제 #10
0
    def __init__(self, mapping_rule=None):
        """
        introduction: The initialization method of Simple Codec.

        :param mapping_rule: Mapping between bases and numbers.
                              There can be two settings:
                              (1) Two bases correspond to a number (0 or 1): i.e. AT-0, CG-1.
                              (2) Each base corresponds to a number: i.e. A-00, T-01, C-10, G-11.
        """

        if not mapping_rule:
            mapping_rule = [0, 1, 1, 0]

        self.mapping_rule = mapping_rule

        self._init_check()

        self.file_size = 0
        self.m = monitor.Monitor()
예제 #11
0
def write_all_from_binary(path, matrix, size, need_log=False):
    """
    introduction: Writing binary matrix to document.

    :param path: File path.
                  Type: string

    :param matrix: A matrix in which each row represents a binary segment that will be used for DNA sequence generation.
                    Type: two-dimensional list(int)

    :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files.
                  Type: int

    :param need_log: choose to output log file or not.
    """
    m = monitor.Monitor()

    try:
        with open(path, "wb+") as file:
            if need_log:
                log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name),
                           "Write file from binary matrix: " + path)

            # Change bit to byte (8 -> 1), and write a file as bytes
            bit_index = 0
            temp_byte = 0
            for row in range(len(matrix)):
                if need_log:
                    m.output(row, len(matrix))
                for col in range(len(matrix[0])):
                    bit_index += 1
                    temp_byte *= 2
                    temp_byte += matrix[row][col]
                    if bit_index == 8:
                        if size >= 0:
                            file.write(struct.pack("B", int(temp_byte)))
                            bit_index = 0
                            temp_byte = 0
                            size -= 1
    except IOError:
        log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                   "The file selection operation was not performed correctly. Please execute the operation again!")
예제 #12
0
    def __init__(self, base_values=None, need_log=False):
        """
        introduction: The initialization method of Grass Codec.

        :param base_values: Assignment of 48 base triplets (0-47).
                             Other values and their corresponding triplets will be discarded.
                             One-dimensional list containing all values of 0-46, 47 will be discarded.
        """

        if base_values is None:
            base_values = [index for index in range(48)]

        temp_keys = []
        temp_values = []
        for index in range(len(base_values)):
            if 0 <= base_values[index] < 47:
                temp_keys.append(inherent.gc_codes[index])
                temp_values.append(base_values[index])

        self.mapping_rule = [temp_keys, temp_values]
        self.segment_length = 0
        self.file_size = 0
        self.m = monitor.Monitor()