def sam2graph(sam):
    """ Convert a  BAM / SAM file into a alignement set dictionary"""

    refDict = {}
    G = {}
    with pysam.AlignmentFile(sam) as samfile:
        for read in samfile.fetch():
            # Read and Reference data
            target = Read(read.query_name)
            target.sequence = read.query_sequence

            refName = read.reference_name

            if(refName not in refDict):
                reference = Read(refName)
                reference.sequence = fastaDict[refName]
                refDict[refName] = reference
                G[refName] = set()
            else:
                reference = refDict[refName]

            # Alignment data
            s1 = read.query_alignment_start
            e1 = read.query_alignment_end
            s2 = read.reference_start
            e2 = read.reference_end
            cigar = read.cigarstring
            relPos = "-" if read.is_reverse else "+"
            align = Align(target, reference, s1, e1, s2, e2, cigar, relPos)

            G[refName].add(align)
    return(G)
def sam2graph(sam, fasta):
    """ Convert a  BAM / SAM file into a alignement set dictionary"""

    refDict = {}
    targetDict = {}
    G = {}
    with pysam.AlignmentFile(sam) as samfile:
        for read in samfile.fetch():
            if(not read.flag & 4  ):
                # Fetching read and reference names
                targetName = read.query_name
                refName = read.reference_name

                # Checking existence and creating / fetching objects
                if(targetName not in targetDict):
                    target = Read(read.query_name)
                    targetDict[targetName] = target
                else:
                    target = targetDict[targetName]

                # Since one read can be mapped on multiple transcripts,
                # sequences are only stored on "prefered" mapping.
                # Need to account for this when filling target objects
                if(read.query_sequence):
                        target.sequence = read.query_sequence

                if(refName not in refDict):
                    reference = Read(refName)
                    try:
                        refGene = fasta.genes[fasta.transcriptMap[refName]]
                    except KeyError:
                        print(refName)
                        exit()
                    reference.sequence = refGene.transcripts[refName]
                    refDict[refName] = reference
                    G[refName] = set()
                else:
                    reference = refDict[refName]

                # Alignment data
                s1 = read.query_alignment_start
                e1 = read.query_alignment_end
                s2 = read.reference_start
                e2 = read.reference_end
                cigar = read.cigarstring
                relPos = "-" if read.is_reverse else "+"
                align = Align(target, reference, s1, e1, s2, e2, cigar, relPos)

                G[refName].add(align)
    return(G)
def main():
    print("<div style=\"border:1px solid black;\">", end="\n\n")
    print("`{bm-disable-all}`", end="\n\n")
    try:
        lines = []
        while True:
            try:
                line = input().strip()
                if len(line) > 0:
                    lines.append(line)
            except EOFError:
                break

        command = lines[0]
        lines = lines[1:]
        counter = Counter(lines)
        if command == 'reads':
            frags = [Read(r, i) for r, c in counter.items() for i in range(c)]
        elif command == 'read-pairs':
            frags = [ReadPair(Kdmer(r.split('|')[0], r.split('|')[2], int(r.split('|')[1])), i) for r, c in counter.items() for i in range(c)]
        else:
            raise
        graph = to_overlap_graph(frags)
        print(f'Given the fragments {lines}, the overlap graph is...', end="\n\n")
        print(f'```{{dot}}\n{to_graphviz(graph)}\n```\n\n')
    finally:
        print("</div>", end="\n\n")
        print("`{bm-enable-all}`", end="\n\n")
def main():
    print("<div style=\"border:1px solid black;\">", end="\n\n")
    print("`{bm-disable-all}`", end="\n\n")
    try:
        lines = []
        while True:
            try:
                line = input().strip()
                if len(line) > 0:
                    lines.append(line)
            except EOFError:
                break

        frag_to_count = [l.split() for l in lines]
        print(f'Sequenced fragments:', end='\n\n')
        for f in frag_to_count:
            print(f' * {f[0]} was scanned in {f[1]} times.')
        raw_reads = [read for e in frag_to_count for read in [Read(e[0])] * int(e[1])]
        occurrence_probabilities = calculate_fragment_occurrence_probabilities(raw_reads)
        print(f'', end='\n\n')
        print(f'Probability of occurrence in genome:', end='\n\n')
        for read, appearances in occurrence_probabilities.items():
            print(f' * {read} probably has {appearances} appearances in the genome.')
    finally:
        print("</div>", end="\n\n")
        print("`{bm-enable-all}`", end="\n\n")
Beispiel #5
0
def main():
    print("<div style=\"border:1px solid black;\">", end="\n\n")
    print("`{bm-disable-all}`", end="\n\n")
    try:
        lines = []
        while True:
            try:
                line = input().strip()
                if len(line) > 0:
                    lines.append(line)
            except EOFError:
                break

        command = lines[0]
        lines = lines[1:]
        counter = Counter(lines)
        if command == 'reads':
            frags = [Read(r, i) for r, c in counter.items() for i in range(c)]
        elif command == 'read-pairs':
            frags = [ReadPair(Kdmer(r.split('|')[0], r.split('|')[2], int(r.split('|')[1])), i) for r, c in counter.items() for i in range(c)]
        else:
            raise
        graph = to_debruijn_graph(frags)
        print(f'Given the fragments {lines}, the de Bruijn graph is...', end="\n\n")
        print(f'```{{dot}}\n{to_graphviz(graph)}\n```\n\n')
        print(f'... and a Eulerian cycle is ...', end="\n\n")
        path = walk_eulerian_cycle(graph, frags[0].prefix())
        print(f'{" -> ".join([str(p) for p in path])}')
    finally:
        print("</div>", end="\n\n")
        print("`{bm-enable-all}`", end="\n\n")
Beispiel #6
0
    def append_overlap(self: ReadPair, other: ReadPair, skip: int = 1) -> ReadPair:
        self_head = Read(self.data.head)
        other_head = Read(other.data.head)
        new_head = self_head.append_overlap(other_head)
        new_head = new_head.data

        self_tail = Read(self.data.tail)
        other_tail = Read(other.data.tail)
        new_tail = self_tail.append_overlap(other_tail)
        new_tail = new_tail.data

        # WARNING: new_d may go negative -- In the event of a negative d, it means that rather than there being a gap
        # in between the head and tail, there's an OVERLAP in between the head and tail. To get rid of the overlap, you
        # need to remove either the last d chars from head or first d chars from tail.
        new_d = self.d - skip
        kdmer = Kdmer(new_head, new_tail, new_d)

        return ReadPair(kdmer, source=('overlap', [self, other]))
Beispiel #7
0
 def create_string(self, min_size, max_size, min_distance, max_distance,
                   error, overlap_chance):
     index = 0
     while index < self.size:
         read_size = random.randint(min_size, max_size)
         if random.random() < 0.5:
             data = self.H1[index:index + read_size]
         else:
             data = self.H2[index:index + read_size]
         if len(data) > 0:
             read = Read(index, data, error)  # error will be 0 for now
             self.reads.append(read)
         if random.random() > overlap_chance:
             index += min(random.randint(min_distance, max_distance),
                          read_size - 1)
def main():
    print("<div style=\"border:1px solid black;\">", end="\n\n")
    print("`{bm-disable-all}`", end="\n\n")
    try:
        lines = []
        while True:
            try:
                line = input().strip()
                if len(line) > 0:
                    lines.append(line)
            except EOFError:
                break

        command = lines[0]
        lines = lines[1:]
        counter = Counter(lines)
        if command == 'reads':
            frags = [Read(r, i) for r, c in counter.items() for i in range(c)]
        elif command == 'read-pairs':
            frags = [
                ReadPair(
                    Kdmer(
                        r.split('|')[0],
                        r.split('|')[2], int(r.split('|')[1])), i)
                for r, c in counter.items() for i in range(c)
            ]
        else:
            raise
        graph = to_overlap_graph(frags)
        print(f'Given the fragments {lines}, the overlap graph is...',
              end="\n\n")
        print(f'```{{dot}}\n{to_graphviz(graph)}\n```', end="\n\n")
        print(f'... and the Hamiltonian paths are ...', end="\n\n")
        all_paths = set([
            tuple(path) for node in graph.get_nodes()
            for path in walk_hamiltonian_paths(graph, node)
        ])
        for path in all_paths:
            print(f' * {" -> ".join([str(p) for p in path])}')

    finally:
        print("</div>", end="\n\n")
        print("`{bm-enable-all}`", end="\n\n")
Beispiel #9
0
def bamToHap(fasta, bim, bam, quality_threshold=14):
    logger_bamToHap = logging.getLogger('bamToHap')
    logger_bamToHap.info(
        'bamToHap(fasta={}, bim={}, bam={}, quality_threshold={})'.format(
            fasta, bim, bam, quality_threshold))
    for x in bam:
        r = Read(x, fasta)
        try:
            read_bim_l, read_bim_r, read_error_rate, read_hap_str = process_read(
                r, bim, quality_threshold)
        except KeyError as e:
            logger_bamToHap.WARNING('{} KeyError {}'.format(r.query_name, e))
        except Exception as e:
            logger_bamToHap.WARNING('{} {} {}'.format(r.query_name, type(e),
                                                      e))
        else:
            # name, chr, start, end, MAPQ, bim_l, bim_r, hapstr
            read_output = (r.reference_name, r.l.get_pos(), r.r.get_pos(),
                           r.query_name, r.mapping_quality, read_error_rate,
                           read_bim_l, read_bim_r, read_hap_str)
            print('\t'.join([str(x) for x in read_output]))
Beispiel #10
0
def main():
    print("<div style=\"border:1px solid black;\">", end="\n\n")
    print("`{bm-disable-all}`", end="\n\n")
    try:
        lines = []
        while True:
            try:
                line = input().strip()
                if len(line) > 0:
                    lines.append(line)
            except EOFError:
                break

        command = lines[0]
        lines = lines[1:]
        counter = Counter(lines)
        if command == 'reads':
            frags = [Read(r, i) for r, c in counter.items() for i in range(c)]
        elif command == 'read-pairs':
            frags = [
                ReadPair(
                    Kdmer(
                        r.split('|')[0],
                        r.split('|')[2], int(r.split('|')[1])), i)
                for r, c in counter.items() for i in range(c)
            ]
        else:
            raise
        graph = to_debruijn_graph(frags)
        graph, head_nodes, tail_nodes = balance_graph(graph)
        print(
            f'Given the fragments {lines}, the artificially balanced de Bruijn graph is...',
            end="\n\n")
        print(f'```{{dot}}\n{to_graphviz(graph)}\n```\n\n')
        print(
            f'... with original head nodes at {head_nodes} and tail nodes at {tail_nodes}.'
        )
    finally:
        print("</div>", end="\n\n")
        print("`{bm-enable-all}`", end="\n\n")
Beispiel #11
0
from Read import Read
from ToDeBruijnGraph import to_debruijn_graph
from Utils import slide_window

with open('/home/user/Downloads/dataset_240257_6(1).txt',
          mode='r',
          encoding='utf-8') as f:
    data = f.read()

lines = data.split('\n')
k = int(lines[0].strip())
dna = lines[1].strip()

reads = [Read(kmer) for kmer, _ in slide_window(dna, k)]

graph = to_debruijn_graph(reads)
for node, other_nodes in graph.get_all_outputs():
    other_nodes = list(other_nodes)
    if len(other_nodes) == 0:
        continue
    print(f'{node} -> {",".join([str(x) for x in other_nodes])}')
Beispiel #12
0
#!/usr/bin/env python
# -*- coding: utf8 -*-
from Read import Read
from Attendance import Attendance
from Email import Email
import sys

if len(sys.argv) > 1:
    PASSED_ARG = sys.argv[1]
    MODE = None

    if PASSED_ARG == "--register":
        MODE = 0
        Read(MODE).start()
    elif PASSED_ARG == "--read":
        MODE = 1
        Read(MODE).start()
    elif PASSED_ARG == "--offline":
        Attendance().markOfflineAttendance()
    elif PASSED_ARG == "--email":
        Email().send()
    elif PASSED_ARG == "--help":
        print open("manual.txt", "r").read()
    else:
        print "app: invalid option -- " + PASSED_ARG
        print "Try python app.py --help for more information."
else:
    print "Try python app.py --help for more information."
from random import shuffle

from Read import Read
from ToOverlapGraphHash import to_overlap_graph
from Utils import enumerate_patterns

segments = [Read(segment) for segment in enumerate_patterns(4, '01')]
print(f'{segments}')

graph = to_overlap_graph(segments)

graph_items_randomized = list(graph.get_all_outputs())
shuffle(graph_items_randomized)
for segment, other_segments in graph_items_randomized:
    other_segments = list(other_segments)
    print(f'{segment} -> {",".join([str(x) for x in other_segments])}')


def walk(path):
    if len(path) == len(graph):
        print(f'{path} -> {path[0].stitch(path)}')
        return

    n = path[-1]
    for child_n in graph.get_outputs(n):
        if child_n not in path:
            path.append(child_n)
            walk(path)
            path.pop()

from typing import List

from Graph import Graph
from Read import Read
from WalkRandomEulerianCycle import walk_eulerian_cycle

graph = Graph()
graph.insert_edge(Read('00'), Read('00'))
graph.insert_edge(Read('00'), Read('01'))
graph.insert_edge(Read('01'), Read('10'))
graph.insert_edge(Read('01'), Read('11'))
graph.insert_edge(Read('10'), Read('00'))
graph.insert_edge(Read('10'), Read('01'))
graph.insert_edge(Read('11'), Read('10'))
graph.insert_edge(Read('11'), Read('11'))


def eularian_path_to_kmers(cycle_path: List[Read]) -> List[Read]:
    out = []
    for i in range(len(cycle_path) - 1):
        kmer = cycle_path[i].data + cycle_path[i + 1].data[-1]
        out.append(Read(kmer))
    return out


def do_kmers_cycle(reads: List[Read]) -> bool:
    for i in range(len(reads) - 1):
        if reads[i].suffix() != reads[i + 1].prefix():
            return False
    if reads[-1].suffix() != reads[0].prefix():
        return False
from Read import Read
from ToOverlapGraphHash import to_overlap_graph

with open('/home/user/Downloads/dataset_240256_10(1).txt', mode='r', encoding='utf-8') as f:
    data = f.read()

lines = data.split('\n')
dnas = lines[:]
dnas = [l.strip() for l in dnas] # get rid of whitespace
dnas = [l for l in dnas if len(l) > 0] # get rid of empty lines

reads = [Read(kmer) for kmer in dnas]
overlaps = to_overlap_graph(reads)
for kmer, other_kmers in overlaps.get_all_outputs():
    other_kmers = list(other_kmers)
    if len(other_kmers) == 0:
        continue
    print(f'{kmer} -> {",".join([str(x) for x in other_kmers])}')
from Read import Read
from ToDeBruijnGraph import to_debruijn_graph
from Utils import enumerate_patterns
from WalkRandomEulerianCycle import walk_eulerian_cycle

with open('/home/user/Downloads/dataset_240261_11(1).txt',
          mode='r',
          encoding='utf-8') as f:
    data = f.read()

lines = data.split('\n')
k = int(lines[0])

reads = [Read(s) for s in enumerate_patterns(k, '01')]
graph = to_debruijn_graph(reads)
path = walk_eulerian_cycle(graph, next(graph.get_nodes()))
k_universal_str = path[0].stitch(path)
print(k_universal_str)
from BalanceNearlyBalancedGraph import balance_graph
from Read import Read
from ToDeBruijnGraph import to_debruijn_graph
from WalkRandomEulerianCycle import walk_eulerian_cycle

with open('/home/user/Downloads/dataset_240261_7(1).txt',
          mode='r',
          encoding='utf-8') as f:
    data = f.read()

lines = data.split('\n')
k = int(lines[0].strip())
kmers = lines[1:]
kmers = [l.strip() for l in kmers]  # get rid of whitespace
kmers = [l for l in kmers if len(l) > 0]  # get rid of empty lines

reads = [Read(kmer) for kmer in kmers]
graph = to_debruijn_graph(reads)

graph, roots, tails = balance_graph(graph)

path = walk_eulerian_cycle(graph, roots.pop())
path.pop(
)  # last conn in cycle is artificial -- it was created from balancing so generating this path would be fast
genome = path[0].stitch(path)
print(f'{genome}')
Beispiel #18
0
def check_catch_bad_input(test, samline):
    print Read(sam_line)
def eularian_path_to_kmers(cycle_path: List[Read]) -> List[Read]:
    out = []
    for i in range(len(cycle_path) - 1):
        kmer = cycle_path[i].data + cycle_path[i + 1].data[-1]
        out.append(Read(kmer))
    return out
Beispiel #20
0
    else:
        continue_loop = False
        exit(0)

while continue_loop:
    menu_input = int(input("[1] Add new ToDo file and add your text"
                           "\n[2] Append to an existing file\n""[3] Read from an existing file\n"))
    if menu_input == 1:
        to_do = Add_to_dos()
        to_do.add_file()
        return_to_main()
    elif menu_input == 2:
        try:
            append = Append()
            append.append_file()
            return_to_main()
        except OSError as e:
            print("Error!\nThe file was not found")

    elif menu_input == 3:
        try:
            read = Read()
            read.read_file()
        except OSError as e:
            print("Error!\nThe file was not found")
        #except:
            #print("Error \nAnother error occurred, please try again")
        return_to_main()
    else:
        print("Error \nCould not find the menu choice in your input")
        return_to_main()
Beispiel #21
0
reads_filepath = 'FinalChallengeReads.txt.xz'
with lzma.open(reads_filepath, mode='rt', encoding='utf-8') as f:
    lines = f.read().splitlines()
    lines = [l.strip() for l in lines]  # get rid of whitespace
    lines = [l for l in lines if len(l) > 0]  # get rid of empty lines

lines_split = [tuple(l.split('|', maxsplit=2)) for l in lines]
kdmers = [Kdmer(k1, k2, 1000) for k1, k2 in lines_split]
rps = [ReadPair(kdmer) for kdmer in kdmers]
broken_rps = [broken_rp for rp in rps for broken_rp in rp.shatter(40)]

broken_rps = list(set(broken_rps))

graph = to_debruijn_graph(broken_rps)
contig_paths = find_maximal_non_branching_paths(graph)

contig_paths.sort(key=lambda x: len(x))

for path in contig_paths:
    if len(path) >= path[0].d:
        out = path[0].stitch(path)
        print(f'{len(path)} kd-mers = {out}')
    else:
        heads = [Read(p.data.head) for p in path]
        heads_out = heads[0].stitch(heads)
        tails = [Read(p.data.tail) for p in path]
        tails_out = tails[0].stitch(tails)
        print(f'{len(heads)} k-mers = {heads_out}')
        print(f'{len(tails)} k-mers = {tails_out}')
Beispiel #22
0
    def create_read(self, dna_seq):
        nucleotides = apply_errors(dna_seq)
        qualities = create_qualities(self.read_length, self.avg_quality,
                                     self.quality_min, self.quality_max)

        return Read(nucleotides, qualities)
Beispiel #23
0
def setup():
    """Create fixtures"""

    # Define chromosome sizes
    Read.extract_chromosome_sizes([
        "@HD\tVN:1.0\tSO:unsorted", "@SQ\tSN:chr1\tLN:300",
        "@SQ\tSN:chr2\tLN:200", "@PG\tID:test\tVN:0.1"
    ])
    Feature.process_set_chromosome_conversion(["1\tchr1", "2\tchr2"])

    good_input["bed input counting all of the read"] = (
        "all",
        "[17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]"
    )
    good_input["bed input counting start of the read"] = (
        "start", "[17, 18, 19, 20, 21, 22, 23]")
    good_input["bed input counting end of the read"] = (
        "end", "[36, 37, 38, 39, 40, 41, 42]")
    good_input["gff input counting all of the read"] = (
        "all",
        "[43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8]"
    )
    good_input["gff input counting start of the read"] = (
        "start", "[43, 42, 41, 40, 39, 38, 37]")
    good_input["gff input counting end of the read"] = (
        "end", "[14, 13, 12, 11, 10, 9, 8]")

    for method in ['all', 'start', 'end']:
        print "\nTesting feature_count option: ****{}****".format(method)

        if method == 'all':
            metagene = Metagene(10, 4, 2)
            print "\t  with Metagene:\t{}".format(metagene)
            print "\t  with chromosome conversions:\t{}".format(
                Feature.chromosome_conversion)
        else:
            metagene = Metagene(1, 4, 2)
            print "\t  with Metagene:\t{}".format(metagene)
            print "\t  with chromosome conversions:\t{}".format(
                Feature.chromosome_conversion)

        # create feature from BED line
        try:
            bedline = "{}\t{}\t{}\t{}\t{}\t{}\n".format(
                1, 20, 40, "first", 44, "+")
            print "\t  with BED line:\t{}".format(bedline.strip())
            feature1 = Feature.create_from_bed(method, metagene, bedline,
                                               False, False)
            if str(feature1.position_array) != correct_features['bed'][method]:
                print "**FAILED**\t  Create Feature from BED line ?"
                print "\t  Desired positions:\t{}".format(
                    correct_features['bed'][method])
                print "\t  Created positions:\t{}".format(
                    feature1.position_array)
        except MetageneError as err:
            print "**FAILED**\t  Create Feature from BED line ?"
        else:
            print "PASSED\t  Create Feature from BED line ?\t\t{}".format(
                feature1.get_chromosome_region())

        # create feature from GFF line
        try:
            gffline = "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
                2, "test", "gene", 10, 39, ".", "-", ".", "second")
            print "\t  with GFF line:\t{}".format(gffline.strip())
            feature2 = Feature.create_from_gff(method, metagene, gffline,
                                               False, False)
            if str(feature2.position_array) != correct_features['gff'][method]:
                print "**FAILED**\t  Create Feature from GFF line ?\t**FAIL**"
                print "\t  Desired positions:\t{}".format(
                    correct_features['gff'][method])
                print "\t  Created positions:\t{}".format(
                    feature2.position_array)
        except MetageneError as err:
            print "**FAILED**\t  Create Feature from GFF line ?"
        else:
            print "PASSED\t  Create Feature from GFF line ?\t\t{}".format(
                feature2.get_chromosome_region())

        # create feature from GFF line with start and end swapped
        try:
            gffline = "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
                2, "test", "gene", 39, 10, ".", "-", ".", "second")
            print "\t  with GFF line:\t{}".format(gffline.strip())
            feature2 = Feature.create_from_gff(method, metagene, gffline,
                                               False, False)
            if str(feature2.position_array) != correct_features['gff'][method]:
                print "**FAILED**\t  Create Feature from GFF line with swapped start and end ?\t**FAIL**"
                print "\t  Desired positions:\t{}".format(
                    correct_features['gff'][method])
                print "\t  Created positions:\t{}".format(
                    feature2.position_array)
        except MetageneError as err:
            print "**FAILED**\t  Create Feature from GFF line with swapped start and end ?"
        else:
            print "PASSED\t  Create Feature from GFF line with swapped start and end ?\t\t{}".format(
                feature2.get_chromosome_region())
        try:
            gffline = "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
                2, "test", "gene", 39, 10, ".", "+", ".", "second")
            print "\t  with GFF line:\t{}".format(gffline.strip())
            feature2 = Feature.create_from_gff(method, metagene, gffline,
                                               False, False)
            if str(feature2.position_array) != correct_features['gff'][method]:
                print "**FAILED**\t  Do not create Feature from GFF line with swapped start and end, + strand ?\t**FAIL**"
                print "\t  Desired positions:\t{}".format(
                    correct_features['gff'][method])
                print "\t  Created positions:\t{}".format(
                    feature2.position_array)
        except MetageneError as err:
            print "PASSED\t  Do not create Feature from GFF line with swapped start and end, + strand ?\t\t{}".format(
                err)
        else:
            print "**FAILED**\t  Do not create Feature from GFF line with swapped start and end, + strand ?\t\t{}".format(
                feature2.get_chromosome_region())

        ##TODO finish complete testing of Feature class
    print "\n##TODO finish testing of Feature class creation\n"

    print "\n**** Testing counting and maniputlation ****\n"

    expected = {'all': {}, 'start': {}, 'end': {}}
    #  Positions in metagene:                           17    18     19   20  21-22,23-24,25-26,27-28,29-30,31-32,33-34,35-36,37-38,39-40,  41,   42
    expected['all'] = {
        'all':
        "first,sense:allreads,0.333,0.333,0.000,0.000,0.000,0.000,0.000,0.000,0.286,0.571,0.571,0.000,0.000,0.286,0.286,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.100,0.100,0.100,0.100,0.100,0.000,0.000,0.000,0.000,0.000,0.111",
        'start':
        "first,sense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.500,0.000,0.000,0.000,0.000,0.000,0.000",
        'end':
        "first,sense:allreads,0.000,3.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.500,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000"
    }
    #  Positions in metagene:                           17    18    19    20   [21]   22    23
    expected['start'] = {
        'all':
        "first,sense:allreads,0.333,0.333,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.050",
        'start':
        "first,sense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000",
        'end':
        "first,sense:allreads,0.000,3.000,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.500"
    }
    #  Positions in metagene:                           36    37    38    39   [40]   41    42
    expected['end'] = {
        'all':
        "first,sense:allreads,0.000,0.000,0.000,0.000,0.286,0.286,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.111",
        'start':
        "first,sense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000",
        'end':
        "first,sense:allreads,0.000,0.000,0.000,0.000,0.000,2.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,1.000"
    }

    metagene = {
        'all': Metagene(10, 4, 2),
        'start': Metagene(1, 4, 2),
        'end': Metagene(1, 4, 2)
    }

    for method in ['all', 'start', 'end']:
        if method == 'all':
            print "\t  with Metagene:\t{}".format(metagene[method])
            print "\t  with chromosome conversions:\t{}".format(
                Feature.chromosome_conversion)
        else:
            print "\t  with Metagene:\t{}".format(metagene[method])
            print "\t  with chromosome conversions:\t{}".format(
                Feature.chromosome_conversion)

        print "\nTesting feature_count option: ****{}****".format(method)
        feature_line = "{}\t{}\t{}\t{}\t{}\t{}\n".format(
            1, 20, 40, "first", 44, "+")
        feature1 = Feature.create_from_bed(method, metagene[method],
                                           feature_line, False, False)
        print "\tFeature:\t{}".format(feature1.position_array)

        reads = []
        reads.append(
            Read("chr1", "+", 3, 1, [10, 11, 12, 13, 14, 15, 16, 17, 18]))
        reads.append(
            Read("chr1", "-", 1, 2, [23, 24, 25, 26, 27, 28, 29, 30, 31, 32]))
        reads.append(Read("chr1", "+", 4, 2, [30, 31, 32, 33, 34, 40, 41]))
        reads.append(
            Read("chr1", "-", 1, 1, [42, 43, 44, 45, 46, 47, 48, 49, 50]))

        reads.append(Read("chr1", "+", 10, 1, [51, 52, 53, 54, 55]))
        reads.append(Read("chr2", "+", 10, 1,
                          [18, 19, 20, 21, 22, 23, 24, 25]))

        # starting count
        for count_method in ['all', 'start', 'end']:
            print "\nTesting count_method option: ****{}****".format(
                count_method)

            output = "{}\n".format(feature1)

            for r in reads:
                output += "{}\n".format(r)
                feature1.count_read(r, count_method, count_partial_reads=True)
                output += "{}\n".format(feature1)

            output += feature1.print_metagene(pretty=True)
            if str(feature1.print_metagene()).strip() == str(
                    expected[method][count_method]).strip():
                print "PASSED\tCreated correct metagene with feature method {} and count method {} ?".format(
                    method, count_method)
            else:
                print "**FAILED**\tCreated correct metagene with feature method {} and count method {} ?".format(
                    method, count_method)
                print "\tExpected:\n{}".format(expected[method][count_method])
                print "\tActual  :\n{}".format(feature1.print_metagene())
                print "\tSummary of run:\n{}".format(output)
            feature1 = Feature.create_from_bed(
                method, metagene[method], feature_line, False,
                False)  # zero out counter for next round

    try:
        unstranded_read = Read("chr1", ".", 10, 1,
                               [18, 19, 20, 21, 22, 23, 24, 25])
        feature1.count_read(unstranded_read, 'all')
    except MetageneError as err:
        print "PASSED\tCaught unstranded read on stranded count ?\t\t".format(
            err)
    else:
        print "**FAILED**\tCaught unstranded read on stranded count ?"

    try:
        feature_line = "{}\t{}\t{}\t{}\t{}\t{}\n".format(
            1, 20, 40, "first", 44, ".")
        feature1 = Feature.create_from_bed(method, metagene[method],
                                           feature_line, False, False)
        unstranded_read = Read("chr1", ".", 10, 1,
                               [18, 19, 20, 21, 22, 23, 24, 25])
        feature1.count_read(unstranded_read, 'all')
    except MetageneError as err:
        print "**FAILED**\tAllowed unstranded read on unstranded count ?\t\t".format(
            err)
    else:
        print "PASSED\tAllowed unstranded read on unstranded count ?"

    print "\n**** Testing adjust_to_metagene ****\n"

    chromosome_converter = {"1": "chr1", "2": "chr2"}

    # ((metagene_tupple),(feature_tupple),expected_result_string, message_string)
    tests = [((8, 2, 2), (16, 8, 24, 4),
              '8.000,8.000,4.000,4.000,12.000,12.000,2.000,2.000',
              "Expand to metagene ?"),
             ((4, 2, 2), (6, 8, 6, 2, 4, 4, 2, 4, 24, 8),
              '17.000,9.000,8.000,34.000', "Contract to metagene ?"),
             ((4, 2, 2), (2.5, 4, (10.0 / 3), 10, 11, 7.3, 4),
              '5.500,9.333,17.825,9.475', "Contract with messy floats ?"),
             ((3, 2, 2), (2.5, 4, (10.0 / 3), 10, 11, 7.3, 4),
              '7.611,19.556,14.967', "Contract with other messy floats ?")]

    for t in tests:
        metagene = Metagene(*t[0])
        print "\t{}".format(metagene)
        feature_line = "{}\t{}\t{}\n".format(1, 0, len(t[1]))
        feature = Feature.create_from_bed('all',
                                          metagene,
                                          feature_line,
                                          False,
                                          False,
                                          short=True)
        adjusted_feature = ""
        for f in feature.adjust_to_metagene(t[1]):
            adjusted_feature += "{0:0.3f},".format(f)
        if adjusted_feature[:-1] == t[2]:
            print "PASSED\t{}".format(t[3])
        else:
            print "**FAILED**\t{}".format(t[3])
            print "\tExpected:\t{}".format(t[2])
            print "\tActual  :\t{}".format(adjusted_feature[:-1])
            print "\tOriginal:\t{}".format(feature.adjust_to_metagene(t[1]))

    print "\n**** End of Testing the Feature class ****\n"


# end of Feature.test method
Beispiel #24
0

def onResult(data):
    print "PLC Time is", data
    return data


d.addCallback(onResult)
#plcTime.set(None) # Set PLC to current server time

# Start listening as HTTP server
root = File("www")
#root.putChild("membrane_insert", Membrane(plc))
root.putChild("events", EventSource(plc))
root.putChild("write", Write(plc))
root.putChild("read", Read(plc))
root.putChild("logger", Logger(plc))
root.putChild("membrane", Membrane())
root.putChild("product", Product())
root.putChild("chemical", Chemical())
root.putChild("bag-filter", BagFilter())

factory = Site(root)
reactor.listenTCP(8000, factory)
reactor.run()
exit()

# /////////////////////////
# What follows is test code

# Read log value