Example #1
0
  def __init__(self, read, mate, reflengths, refnames, splitparts):
    """
    Initialize reads and find out type of reads
    """
    if mate and Paired.isFirst(mate, read):
      self.__read = Read(mate, True, Paired._readStrand, refnames)
      self.__mate = Read(read, False, Paired._mateStrand, refnames)
    else:
      self.__read = Read(read, True, Paired._readStrand, refnames)
      self.__mate = Read(mate, False, Paired._mateStrand, refnames)

    self.__qname = read.qname
    self.__reflengths = reflengths
    self.__splitpair = None

    if not self.__read.hasMinQuality(): # read doesn't have minimal mapping quality
      if self.__mate.isUnmapped() or not self.__mate.hasMinQuality(): # both don't have minimal mapping quality
        self.__type = Paired.rtype.FILTERED
      else: # make mate single
        self.__read = self.__mate
        self.__mate = None
        self.__type = Paired.rtype.SINGLE
    elif self.__mate.isUnmapped(): # mate unmapped
      self.__type = Paired.rtype.SINGLE
    elif not self.__mate.hasMinQuality(): # mate doesn't have minimal mapping quality
      self.__type = Paired.rtype.SINGLE
      self.__mate = None
    elif self.__read.isDuplicate(): # read is duplicate
      if self.__mate.isDuplicate(): # both are duplicated -> filter out
        self.__type = Paired.rtype.FILTERED
      else: # only read is duplicte -> make mate single
        self.__type = Paired.rtype.SINGLE
        self.__read = self.__mate
        self.__mate = None
    elif self.__mate.isDuplicate(): # mate is duplicate -> make read single
      self.__type = Paired.rtype.SINGLE
      self.__mate = None
    elif self.__read.isSplit(): # read is split
      if self.__mate.isInverted() or self.__mate.hasGaps() or self.isInterchromosomal(): # filter both
        self.__type = Paired.rtype.FILTERED
      else: # split read
        self.__type = Paired.rtype.READ_SPLIT
        self.__splitpair = SplitPair(False, self.__mate, SplitRead(self.__read, Paired._readStrand, splitparts))
    elif self.__mate.isSplit(): # mate is split
      if self.__read.isInverted() or self.__read.hasGaps() or self.isInterchromosomal(): # filter both
        self.__type = Paired.rtype.FILTERED
      else: # split mate
        self.__type = Paired.rtype.MATE_SPLIT
        self.__splitpair = SplitPair(True, self.__read, SplitRead(self.__mate, Paired._mateStrand, splitparts))
    elif not self.__read.hasGaps() and not self.__mate.hasGaps(): # paired without any gaps
      self.__type = Paired.rtype.NORMAL
    else:
      self.__type = Paired.rtype.FILTERED

    if self.__splitpair and (not self.__splitpair.splitread.hasMinQuality() or not self.__splitpair.splitread.hasMinLengths() or self.actualSize() <= 0):
      self.__type = Paired.rtype.FILTERED
def check_create_read(test, values):
    # create expected result
    if int(values[0]) == 4:
        expected = "Non-aligning read"
    else:
        start = int(values[2])
        end = int(values[2]) + int(values[4]) - 1
        if values[7] == "-":
            start = end
            end = int(values[2])
        expected = "Read at {0}:{1}-{2} on {3} strand; counts for {4:2.3f}:".format(
            values[1],  # chromosome
            start,
            end,
            values[7],  # strand
            float(values[5]) / float(values[6]))  # abundance / mappings
    # build input to test
    samline = build_samline(*values[0:-1])  # exclude final value
    (created, read) = Read.create_from_sam(samline, chromosome_conversion.values(), count_method='all')
    output = str(read).split("\t")[0]
    # create description in case test fails
    test_description = "\nTest:    \t{}\n".format(test)
    test_description += "Abundance:\t{}\n".format(Read.has_sam_tag["NA"])
    test_description += "Mappings:\t{}\n".format(Read.has_sam_tag["NH"])
    test_description += "Sam Line:\t{}\n".format(samline)
    test_description += "Expected:\t{}\n".format(expected)
    test_description += "Position:\t{}\n".format(output)
    assert output == expected, "{}Error:   \tDid not create expected read.".format(test_description)
def main():
    print("<div style=\"border:1px solid black;\">", end="\n\n")
    print("`{bm-disable-all}`", end="\n\n")
    try:
        lines = []
        while True:
            try:
                line = input().strip()
                if len(line) > 0:
                    lines.append(line)
            except EOFError:
                break

        command = lines[0]
        lines = lines[1:]
        counter = Counter(lines)
        if command == 'reads':
            frags = [Read(r, i) for r, c in counter.items() for i in range(c)]
        elif command == 'read-pairs':
            frags = [ReadPair(Kdmer(r.split('|')[0], r.split('|')[2], int(r.split('|')[1])), i) for r, c in counter.items() for i in range(c)]
        else:
            raise
        graph = to_overlap_graph(frags)
        print(f'Given the fragments {lines}, the overlap graph is...', end="\n\n")
        print(f'```{{dot}}\n{to_graphviz(graph)}\n```\n\n')
    finally:
        print("</div>", end="\n\n")
        print("`{bm-enable-all}`", end="\n\n")
def main():
    print("<div style=\"border:1px solid black;\">", end="\n\n")
    print("`{bm-disable-all}`", end="\n\n")
    try:
        lines = []
        while True:
            try:
                line = input().strip()
                if len(line) > 0:
                    lines.append(line)
            except EOFError:
                break

        frag_to_count = [l.split() for l in lines]
        print(f'Sequenced fragments:', end='\n\n')
        for f in frag_to_count:
            print(f' * {f[0]} was scanned in {f[1]} times.')
        raw_reads = [read for e in frag_to_count for read in [Read(e[0])] * int(e[1])]
        occurrence_probabilities = calculate_fragment_occurrence_probabilities(raw_reads)
        print(f'', end='\n\n')
        print(f'Probability of occurrence in genome:', end='\n\n')
        for read, appearances in occurrence_probabilities.items():
            print(f' * {read} probably has {appearances} appearances in the genome.')
    finally:
        print("</div>", end="\n\n")
        print("`{bm-enable-all}`", end="\n\n")
Example #5
0
def main():
    print("<div style=\"border:1px solid black;\">", end="\n\n")
    print("`{bm-disable-all}`", end="\n\n")
    try:
        lines = []
        while True:
            try:
                line = input().strip()
                if len(line) > 0:
                    lines.append(line)
            except EOFError:
                break

        command = lines[0]
        lines = lines[1:]
        counter = Counter(lines)
        if command == 'reads':
            frags = [Read(r, i) for r, c in counter.items() for i in range(c)]
        elif command == 'read-pairs':
            frags = [ReadPair(Kdmer(r.split('|')[0], r.split('|')[2], int(r.split('|')[1])), i) for r, c in counter.items() for i in range(c)]
        else:
            raise
        graph = to_debruijn_graph(frags)
        print(f'Given the fragments {lines}, the de Bruijn graph is...', end="\n\n")
        print(f'```{{dot}}\n{to_graphviz(graph)}\n```\n\n')
        print(f'... and a Eulerian cycle is ...', end="\n\n")
        path = walk_eulerian_cycle(graph, frags[0].prefix())
        print(f'{" -> ".join([str(p) for p in path])}')
    finally:
        print("</div>", end="\n\n")
        print("`{bm-enable-all}`", end="\n\n")
class DataOperations():
    """This class performs nlp analysis on data."""

    def __init__(self, abs_path = ""):
        """"""
        self.abs_path = abs_path
        self.matrix = Matrix.CreateMatrix(abs_path = abs_path)
        self.ner = NerExtraction.NerExtraction()
        self.gmd = Read()

    def saveMatrix(self, words = {}, abs_path = "", filename = "", min = 0.001, max = 10.0):
        """Generates and saves matrix file from wordlist dictionary of word and occurrences.

        :param words:
            holds "word":445, of all words in database,  dict.
        """
        if len(abs_path) == 0:
            abs_path = self.abs_path
        #testate is dict of newspaper title and associated a dict with words and their frequency
        testate = self.gmd.getMostCommonWordsBySite(respType = "dict", include_percentage = False)
        print "Generating and saving matrix, calling Matrix()..."
        wordlist = self.matrix.saveMatrix(words, testate, abs_path, filename = filename, min = min, max = max)
        return wordlist

    def hierarchical_clustering(self, abs_path = "", filename = ""):
        """Creates clusters and dendrogram."""
        if len(abs_path) == 0:
            abs_path = self.abs_path
        fn = abs_path+"/"+filename
        print "Definig clusters for %s..." % fn
        dataDict = self.matrix.hierarchical_clustering(filename = fn)
        print "Drawing the dendrogram..."
        imgFile = self.matrix.draw_dendrogram(dataDict['clusters'], dataDict['titles'], jpeg = filename+".jpg")
        return imgFile
Example #7
0
def check_create_read(test, values):
    # create expected result
    if int(values[0]) == 4:
        expected = "Non-aligning read"
    else:
        start = int(values[2])
        end = int(values[2]) + int(values[4]) - 1
        if values[7] == "-":
            start = end
            end = int(values[2])
        expected = "Read at {0}:{1}-{2} on {3} strand; counts for {4:2.3f}:".format(
            values[1],  # chromosome
            start,
            end,
            values[7],  # strand
            float(values[5]) / float(values[6]))  # abundance / mappings
    # build input to test
    samline = build_samline(*values[0:-1])  # exclude final value
    (created, read) = Read.create_from_sam(samline,
                                           chromosome_conversion.values(),
                                           count_method='all')
    output = str(read).split("\t")[0]
    # create description in case test fails
    test_description = "\nTest:    \t{}\n".format(test)
    test_description += "Abundance:\t{}\n".format(Read.has_sam_tag["NA"])
    test_description += "Mappings:\t{}\n".format(Read.has_sam_tag["NH"])
    test_description += "Sam Line:\t{}\n".format(samline)
    test_description += "Expected:\t{}\n".format(expected)
    test_description += "Position:\t{}\n".format(output)
    assert output == expected, "{}Error:   \tDid not create expected read.".format(
        test_description)
Example #8
0
    def read(self):
        """Match the grammar for a read."""
        self.match("READ")

        self.stack.append("Designator")
        location = self.designator()

        if not isinstance(location, Location) or location.type != self.integer_type:
            # Not an integer variable.
            try:
                raise InvalidRead(location.type, self.last)
            except InvalidRead as e:
                self.invalid_token(e)

        r = Read(location)
        r.token = self.last
        return r
def sam2graph(sam):
    """ Convert a  BAM / SAM file into a alignement set dictionary"""

    refDict = {}
    G = {}
    with pysam.AlignmentFile(sam) as samfile:
        for read in samfile.fetch():
            # Read and Reference data
            target = Read(read.query_name)
            target.sequence = read.query_sequence

            refName = read.reference_name

            if(refName not in refDict):
                reference = Read(refName)
                reference.sequence = fastaDict[refName]
                refDict[refName] = reference
                G[refName] = set()
            else:
                reference = refDict[refName]

            # Alignment data
            s1 = read.query_alignment_start
            e1 = read.query_alignment_end
            s2 = read.reference_start
            e2 = read.reference_end
            cigar = read.cigarstring
            relPos = "-" if read.is_reverse else "+"
            align = Align(target, reference, s1, e1, s2, e2, cigar, relPos)

            G[refName].add(align)
    return(G)
class GeneralDataView(object):
    """This class formulates the response to the web query."""

    def __init__(self):
        #result = hasattr(DataManagement,req)()
        """"""
        self.dm = Read()

    def trends(self, request):
        """Get trends for politics organizations etc..."""
        filter = "Politici"
        fil_name = "Politic"
        tipo = "PERSON"
        nomi = sorted(self.dm.getEntities(tipo = tipo, filter = fil_name).items(), key = lambda x:x[1], reverse = True)
        sys.stderr.write("Fetched: %s" % (str(nomi)))
        sys.stderr.flush()
        
        tutti = self.dm.getEntities(tipo = tipo)
        return render_to_response('trends.html', locals(), RequestContext(request))


    def fetch(self, request):
        newspapers = self.dm.getTotalArticles()
        topused = self.dm.getMostCommonWordsTotal()
        total_words = self.dm.getTotalWordsInt()
        giornali = self.dm.getMostCommonWordsBySite()
        pprint.pprint(giornali)
        #for giornale in giornali:
        #    sys.stderr.write("Giornali:\n %s\n" % (giornale['testata']))
        sys.stderr.flush()

        sys.stderr.write("Fetched: %s" % (str(newspapers)))
        sys.stderr.flush()
        return render_to_response('statistics.html', locals(), RequestContext(request))
Example #11
0
 def create_string(self, min_size, max_size, min_distance, max_distance,
                   error, overlap_chance):
     index = 0
     while index < self.size:
         read_size = random.randint(min_size, max_size)
         if random.random() < 0.5:
             data = self.H1[index:index + read_size]
         else:
             data = self.H2[index:index + read_size]
         if len(data) > 0:
             read = Read(index, data, error)  # error will be 0 for now
             self.reads.append(read)
         if random.random() > overlap_chance:
             index += min(random.randint(min_distance, max_distance),
                          read_size - 1)
Example #12
0
    def append_overlap(self: ReadPair, other: ReadPair, skip: int = 1) -> ReadPair:
        self_head = Read(self.data.head)
        other_head = Read(other.data.head)
        new_head = self_head.append_overlap(other_head)
        new_head = new_head.data

        self_tail = Read(self.data.tail)
        other_tail = Read(other.data.tail)
        new_tail = self_tail.append_overlap(other_tail)
        new_tail = new_tail.data

        # WARNING: new_d may go negative -- In the event of a negative d, it means that rather than there being a gap
        # in between the head and tail, there's an OVERLAP in between the head and tail. To get rid of the overlap, you
        # need to remove either the last d chars from head or first d chars from tail.
        new_d = self.d - skip
        kdmer = Kdmer(new_head, new_tail, new_d)

        return ReadPair(kdmer, source=('overlap', [self, other]))
def main():
    print("<div style=\"border:1px solid black;\">", end="\n\n")
    print("`{bm-disable-all}`", end="\n\n")
    try:
        lines = []
        while True:
            try:
                line = input().strip()
                if len(line) > 0:
                    lines.append(line)
            except EOFError:
                break

        command = lines[0]
        lines = lines[1:]
        counter = Counter(lines)
        if command == 'reads':
            frags = [Read(r, i) for r, c in counter.items() for i in range(c)]
        elif command == 'read-pairs':
            frags = [
                ReadPair(
                    Kdmer(
                        r.split('|')[0],
                        r.split('|')[2], int(r.split('|')[1])), i)
                for r, c in counter.items() for i in range(c)
            ]
        else:
            raise
        graph = to_overlap_graph(frags)
        print(f'Given the fragments {lines}, the overlap graph is...',
              end="\n\n")
        print(f'```{{dot}}\n{to_graphviz(graph)}\n```', end="\n\n")
        print(f'... and the Hamiltonian paths are ...', end="\n\n")
        all_paths = set([
            tuple(path) for node in graph.get_nodes()
            for path in walk_hamiltonian_paths(graph, node)
        ])
        for path in all_paths:
            print(f' * {" -> ".join([str(p) for p in path])}')

    finally:
        print("</div>", end="\n\n")
        print("`{bm-enable-all}`", end="\n\n")
Example #14
0
def sam2graph(sam, fasta):
    """ Convert a  BAM / SAM file into a alignement set dictionary"""

    refDict = {}
    targetDict = {}
    G = {}
    with pysam.AlignmentFile(sam) as samfile:
        for read in samfile.fetch():
            if(not read.flag & 4  ):
                # Fetching read and reference names
                targetName = read.query_name
                refName = read.reference_name

                # Checking existence and creating / fetching objects
                if(targetName not in targetDict):
                    target = Read(read.query_name)
                    targetDict[targetName] = target
                else:
                    target = targetDict[targetName]

                # Since one read can be mapped on multiple transcripts,
                # sequences are only stored on "prefered" mapping.
                # Need to account for this when filling target objects
                if(read.query_sequence):
                        target.sequence = read.query_sequence

                if(refName not in refDict):
                    reference = Read(refName)
                    try:
                        refGene = fasta.genes[fasta.transcriptMap[refName]]
                    except KeyError:
                        print(refName)
                        exit()
                    reference.sequence = refGene.transcripts[refName]
                    refDict[refName] = reference
                    G[refName] = set()
                else:
                    reference = refDict[refName]

                # Alignment data
                s1 = read.query_alignment_start
                e1 = read.query_alignment_end
                s2 = read.reference_start
                e2 = read.reference_end
                cigar = read.cigarstring
                relPos = "-" if read.is_reverse else "+"
                align = Align(target, reference, s1, e1, s2, e2, cigar, relPos)

                G[refName].add(align)
    return(G)
Example #15
0
def bamToHap(fasta, bim, bam, quality_threshold=14):
    logger_bamToHap = logging.getLogger('bamToHap')
    logger_bamToHap.info(
        'bamToHap(fasta={}, bim={}, bam={}, quality_threshold={})'.format(
            fasta, bim, bam, quality_threshold))
    for x in bam:
        r = Read(x, fasta)
        try:
            read_bim_l, read_bim_r, read_error_rate, read_hap_str = process_read(
                r, bim, quality_threshold)
        except KeyError as e:
            logger_bamToHap.WARNING('{} KeyError {}'.format(r.query_name, e))
        except Exception as e:
            logger_bamToHap.WARNING('{} {} {}'.format(r.query_name, type(e),
                                                      e))
        else:
            # name, chr, start, end, MAPQ, bim_l, bim_r, hapstr
            read_output = (r.reference_name, r.l.get_pos(), r.r.get_pos(),
                           r.query_name, r.mapping_quality, read_error_rate,
                           read_bim_l, read_bim_r, read_hap_str)
            print('\t'.join([str(x) for x in read_output]))
Example #16
0
def main():
    print("<div style=\"border:1px solid black;\">", end="\n\n")
    print("`{bm-disable-all}`", end="\n\n")
    try:
        lines = []
        while True:
            try:
                line = input().strip()
                if len(line) > 0:
                    lines.append(line)
            except EOFError:
                break

        command = lines[0]
        lines = lines[1:]
        counter = Counter(lines)
        if command == 'reads':
            frags = [Read(r, i) for r, c in counter.items() for i in range(c)]
        elif command == 'read-pairs':
            frags = [
                ReadPair(
                    Kdmer(
                        r.split('|')[0],
                        r.split('|')[2], int(r.split('|')[1])), i)
                for r, c in counter.items() for i in range(c)
            ]
        else:
            raise
        graph = to_debruijn_graph(frags)
        graph, head_nodes, tail_nodes = balance_graph(graph)
        print(
            f'Given the fragments {lines}, the artificially balanced de Bruijn graph is...',
            end="\n\n")
        print(f'```{{dot}}\n{to_graphviz(graph)}\n```\n\n')
        print(
            f'... with original head nodes at {head_nodes} and tail nodes at {tail_nodes}.'
        )
    finally:
        print("</div>", end="\n\n")
        print("`{bm-enable-all}`", end="\n\n")
Example #17
0
#!/usr/bin/env python
# -*- coding: utf8 -*-
from Read import Read
from Attendance import Attendance
from Email import Email
import sys

if len(sys.argv) > 1:
    PASSED_ARG = sys.argv[1]
    MODE = None

    if PASSED_ARG == "--register":
        MODE = 0
        Read(MODE).start()
    elif PASSED_ARG == "--read":
        MODE = 1
        Read(MODE).start()
    elif PASSED_ARG == "--offline":
        Attendance().markOfflineAttendance()
    elif PASSED_ARG == "--email":
        Email().send()
    elif PASSED_ARG == "--help":
        print open("manual.txt", "r").read()
    else:
        print "app: invalid option -- " + PASSED_ARG
        print "Try python app.py --help for more information."
else:
    print "Try python app.py --help for more information."
Example #18
0
def show_read():
    content.frameContent.pack_forget()
    content()
    read = Read()
    dict_read = read.get()
    read.show(dict_read, content.frameContent)
from Read import Read

with open('/home/user/Downloads/dataset_240255_3(1).txt', mode='r', encoding='utf-8') as f:
    data = f.read()

lines = data.split('\n')
k = int(lines[0])
text = lines[1]

composition = Read.from_string(text, k)
print('\n'.join([str(x) for x in composition]))
from typing import List

from Graph import Graph
from Read import Read
from WalkRandomEulerianCycle import walk_eulerian_cycle

graph = Graph()
graph.insert_edge(Read('00'), Read('00'))
graph.insert_edge(Read('00'), Read('01'))
graph.insert_edge(Read('01'), Read('10'))
graph.insert_edge(Read('01'), Read('11'))
graph.insert_edge(Read('10'), Read('00'))
graph.insert_edge(Read('10'), Read('01'))
graph.insert_edge(Read('11'), Read('10'))
graph.insert_edge(Read('11'), Read('11'))


def eularian_path_to_kmers(cycle_path: List[Read]) -> List[Read]:
    out = []
    for i in range(len(cycle_path) - 1):
        kmer = cycle_path[i].data + cycle_path[i + 1].data[-1]
        out.append(Read(kmer))
    return out


def do_kmers_cycle(reads: List[Read]) -> bool:
    for i in range(len(reads) - 1):
        if reads[i].suffix() != reads[i + 1].prefix():
            return False
    if reads[-1].suffix() != reads[0].prefix():
        return False
def eularian_path_to_kmers(cycle_path: List[Read]) -> List[Read]:
    out = []
    for i in range(len(cycle_path) - 1):
        kmer = cycle_path[i].data + cycle_path[i + 1].data[-1]
        out.append(Read(kmer))
    return out
Example #22
0
 def add_read():
     read = Read()
     read.add(self.id)
Example #23
0
              "NA:i:4\tNH:i:4",
              "NA:i:4\tNH:i:4",
              "NA:i:4\tNH:i:4",
              "NA:i:4\tNH:i:4"]

    Read.process_set_sam_tag(sample, count_tag=True, tag_regex='NA:i:(\d+)')
    Read.process_set_sam_tag(sample, count_tag=True, tag_regex='NH:i:(\d+)')


def test_build_positions():
    for test in cigar_string:
        yield (check_build_positions, test, cigar_string[test])


def check_build_positions(test, (values, expected)):
    position_array = Read.build_positions(*values)
    test_description = "\nTest:    \t{}\n".format(test)
    test_description += "Expected:\t{}\n".format(expected)
    test_description += "Position:\t{}\n".format(position_array)
    assert position_array == expected, "{}Error:   \tDid not create the expected position array.".format(
        test_description)


def test_catch_bad_cigar_input():
    for test in bad_cigar_string:
        yield (check_catch_bad_cigar_input, test, bad_cigar_string[test])


@raises(MetageneError)
def check_catch_bad_cigar_input(test, (values, expected)):
    print Read.build_positions(*values)
def metagene_count():
    """Chain of command for metagene_count analysis."""
    arguments = get_arguments()
    # confirm BAM file and extract chromosome sizes
    Read.set_chromosome_sizes(arguments.alignment)
    ##TODO: create a list of chromosomes to analyze and/or exclude
    # create chromosome conversion dictionary for feature (GFF/BED) to alignment (BAM)
    Feature.set_chromosome_conversion(arguments.chromosome_names, Read.chromosome_sizes.keys())

    # define has_abundance and has_mappings tags for Read class
    Read.set_sam_tag(arguments.extract_abundance, arguments.alignment, "NA:i:(\d+)")
    Read.set_sam_tag(arguments.extract_mappings, arguments.alignment, "NH:i:(\d+)")

    # define the metagene array shape (left padding, start, internal, end, right padding)
    # metagene = padding ---- internal region ---- padding 
    try:
        metagene = Metagene(arguments.interval_size, arguments.padding, arguments.padding)
        print "Metagene definition:\t{}".format(metagene)
    except MetageneError as err:
        print err
        raise MetageneError("Unable to create the metagene template")

    try:
        Feature.set_format(arguments.feature)  # assign file format for the feature file
        print "Reading feature file as {} format".format(Feature.format)
    except MetageneError as err:
        print err
        raise MetageneError("Unable to create the feature object")

    # print out the header line...
    if not arguments.interval_variable:
        with open("{}.metagene_counts.csv".format(arguments.output_prefix), 'w') as output_file:
            output_file.write("# Metagene:\t{}\n".format(metagene))  # define for plotting later
            output_file.write(metagene.print_full())

    # for each feature
    with open(arguments.feature, 'r') as feature_file:
        for feature_line in read_chunk(feature_file, 1024):
            if feature_line[0] != "#":  # skip comment lines
                # change creation with feature_method
                feature = Feature.create(arguments.feature_count, metagene, feature_line, arguments.count_splicing,
                                         arguments.ignore_strand)

                # pull out sam file lines; it is important to use Feature.get_samtools_region(chromosome_lengths) rather
                # than Feature.get_chromosome_region() because only the first ensures that the interval does not
                # extend beyond the length of the chromosome which makes samtools view return no reads
                (run_pipe_worked, sam_sample) = run_pipe(['samtools view {} {}'.format(
                    arguments.alignment,
                    feature.get_samtools_region())])
                if run_pipe_worked:
                    for samline in sam_sample:
                        if len(samline) > 0:
                            # create Read feature
                            (created_read, read) = Read.create_from_sam(samline,
                                                                        Feature.chromosome_conversion.values(),
                                                                        arguments.count_method,
                                                                        arguments.uniquely_mapping,
                                                                        arguments.ignore_strand,
                                                                        arguments.count_secondary_alignments,
                                                                        arguments.count_failed_quality_control,
                                                                        arguments.count_PCR_optical_duplicate,
                                                                        arguments.count_supplementary_alignment)

                            # count read (if it exists)
                            if created_read:
                                feature.count_read(read, arguments.count_method, arguments.count_splicing,
                                                   arguments.count_partial_reads, arguments.ignore_strand)

                    # output the resulting metagene
                    with open("{}.metagene_counts.csv".format(arguments.output_prefix), 'a') as output_file:
                        output_file.write(
                            "{}\n".format(feature.print_metagene(interval_override=arguments.interval_variable)))

                else:
                    raise MetageneError("Could not pull chromosomal region {} for feature {} from BAM file {}.".format(
                        feature.get_chromosome_region(),
                        feature.name,
                        arguments.alignment))
from Read import Read
from ToDeBruijnGraph import to_debruijn_graph
from Utils import enumerate_patterns
from WalkRandomEulerianCycle import walk_eulerian_cycle

with open('/home/user/Downloads/dataset_240261_11(1).txt',
          mode='r',
          encoding='utf-8') as f:
    data = f.read()

lines = data.split('\n')
k = int(lines[0])

reads = [Read(s) for s in enumerate_patterns(k, '01')]
graph = to_debruijn_graph(reads)
path = walk_eulerian_cycle(graph, next(graph.get_nodes()))
k_universal_str = path[0].stitch(path)
print(k_universal_str)
def setup():
    """Create fixtures"""

    # Define chromosome sizes
    Read.extract_chromosome_sizes(["@HD\tVN:1.0\tSO:unsorted",
                                   "@SQ\tSN:chr1\tLN:300",
                                   "@SQ\tSN:chr2\tLN:200",
                                   "@PG\tID:test\tVN:0.1"])
    Feature.process_set_chromosome_conversion(["1\tchr1",
                                               "2\tchr2"])

    good_input["bed input counting all of the read"] = ("all",
                                                        "[17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]")
    good_input["bed input counting start of the read"] = ("start",
                                                          "[17, 18, 19, 20, 21, 22, 23]")
    good_input["bed input counting end of the read"] = ("end",
                                                        "[36, 37, 38, 39, 40, 41, 42]")
    good_input["gff input counting all of the read"] = ("all",
                                                        "[43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8]")
    good_input["gff input counting start of the read"] = ("start",
                                                          "[43, 42, 41, 40, 39, 38, 37]")
    good_input["gff input counting end of the read"] = ("end",
                                                        "[14, 13, 12, 11, 10, 9, 8]")


    for method in ['all', 'start', 'end']:
        print "\nTesting feature_count option: ****{}****".format(method)

        if method == 'all':
            metagene = Metagene(10, 4, 2)
            print "\t  with Metagene:\t{}".format(metagene)
            print "\t  with chromosome conversions:\t{}".format(Feature.chromosome_conversion)
        else:
            metagene = Metagene(1, 4, 2)
            print "\t  with Metagene:\t{}".format(metagene)
            print "\t  with chromosome conversions:\t{}".format(Feature.chromosome_conversion)


        # create feature from BED line
        try:
            bedline = "{}\t{}\t{}\t{}\t{}\t{}\n".format(1, 20, 40, "first", 44, "+")
            print "\t  with BED line:\t{}".format(bedline.strip())
            feature1 = Feature.create_from_bed(method, metagene, bedline, False, False)
            if str(feature1.position_array) != correct_features['bed'][method]:
                print "**FAILED**\t  Create Feature from BED line ?"
                print "\t  Desired positions:\t{}".format(correct_features['bed'][method])
                print "\t  Created positions:\t{}".format(feature1.position_array)
        except MetageneError as err:
            print "**FAILED**\t  Create Feature from BED line ?"
        else:
            print "PASSED\t  Create Feature from BED line ?\t\t{}".format(feature1.get_chromosome_region())

        # create feature from GFF line
        try:
            gffline = "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(2, "test", "gene", 10, 39, ".", "-", ".", "second")
            print "\t  with GFF line:\t{}".format(gffline.strip())
            feature2 = Feature.create_from_gff(method, metagene, gffline, False, False)
            if str(feature2.position_array) != correct_features['gff'][method]:
                print "**FAILED**\t  Create Feature from GFF line ?\t**FAIL**"
                print "\t  Desired positions:\t{}".format(correct_features['gff'][method])
                print "\t  Created positions:\t{}".format(feature2.position_array)
        except MetageneError as err:
            print "**FAILED**\t  Create Feature from GFF line ?"
        else:
            print "PASSED\t  Create Feature from GFF line ?\t\t{}".format(feature2.get_chromosome_region())

        # create feature from GFF line with start and end swapped
        try:
            gffline = "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(2, "test", "gene", 39, 10, ".", "-", ".", "second")
            print "\t  with GFF line:\t{}".format(gffline.strip())
            feature2 = Feature.create_from_gff(method, metagene, gffline, False, False)
            if str(feature2.position_array) != correct_features['gff'][method]:
                print "**FAILED**\t  Create Feature from GFF line with swapped start and end ?\t**FAIL**"
                print "\t  Desired positions:\t{}".format(correct_features['gff'][method])
                print "\t  Created positions:\t{}".format(feature2.position_array)
        except MetageneError as err:
            print "**FAILED**\t  Create Feature from GFF line with swapped start and end ?"
        else:
            print "PASSED\t  Create Feature from GFF line with swapped start and end ?\t\t{}".format(
                feature2.get_chromosome_region())
        try:
            gffline = "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(2, "test", "gene", 39, 10, ".", "+", ".", "second")
            print "\t  with GFF line:\t{}".format(gffline.strip())
            feature2 = Feature.create_from_gff(method, metagene, gffline, False, False)
            if str(feature2.position_array) != correct_features['gff'][method]:
                print "**FAILED**\t  Do not create Feature from GFF line with swapped start and end, + strand ?\t**FAIL**"
                print "\t  Desired positions:\t{}".format(correct_features['gff'][method])
                print "\t  Created positions:\t{}".format(feature2.position_array)
        except MetageneError as err:
            print "PASSED\t  Do not create Feature from GFF line with swapped start and end, + strand ?\t\t{}".format(
                err)
        else:
            print "**FAILED**\t  Do not create Feature from GFF line with swapped start and end, + strand ?\t\t{}".format(
                feature2.get_chromosome_region())


        ##TODO finish complete testing of Feature class
    print "\n##TODO finish testing of Feature class creation\n"

    print "\n**** Testing counting and maniputlation ****\n"

    expected = {'all': {}, 'start': {}, 'end': {}}
    #  Positions in metagene:                           17    18     19   20  21-22,23-24,25-26,27-28,29-30,31-32,33-34,35-36,37-38,39-40,  41,   42
    expected['all'] = {
    'all': "first,sense:allreads,0.333,0.333,0.000,0.000,0.000,0.000,0.000,0.000,0.286,0.571,0.571,0.000,0.000,0.286,0.286,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.100,0.100,0.100,0.100,0.100,0.000,0.000,0.000,0.000,0.000,0.111",
    'start': "first,sense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.500,0.000,0.000,0.000,0.000,0.000,0.000",
    'end': "first,sense:allreads,0.000,3.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.500,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000"}
    #  Positions in metagene:                           17    18    19    20   [21]   22    23
    expected['start'] = {
    'all': "first,sense:allreads,0.333,0.333,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.050",
    'start': "first,sense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000",
    'end': "first,sense:allreads,0.000,3.000,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.500"}
    #  Positions in metagene:                           36    37    38    39   [40]   41    42
    expected['end'] = {
    'all': "first,sense:allreads,0.000,0.000,0.000,0.000,0.286,0.286,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.111",
    'start': "first,sense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000",
    'end': "first,sense:allreads,0.000,0.000,0.000,0.000,0.000,2.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,1.000"}

    metagene = {'all': Metagene(10, 4, 2),
                'start': Metagene(1, 4, 2),
                'end': Metagene(1, 4, 2)}

    for method in ['all', 'start', 'end']:
        if method == 'all':
            print "\t  with Metagene:\t{}".format(metagene[method])
            print "\t  with chromosome conversions:\t{}".format(Feature.chromosome_conversion)
        else:
            print "\t  with Metagene:\t{}".format(metagene[method])
            print "\t  with chromosome conversions:\t{}".format(Feature.chromosome_conversion)

        print "\nTesting feature_count option: ****{}****".format(method)
        feature_line = "{}\t{}\t{}\t{}\t{}\t{}\n".format(1, 20, 40, "first", 44, "+")
        feature1 = Feature.create_from_bed(method, metagene[method], feature_line, False, False)
        print "\tFeature:\t{}".format(feature1.position_array)

        reads = []
        reads.append(Read("chr1", "+", 3, 1, [10, 11, 12, 13, 14, 15, 16, 17, 18]))
        reads.append(Read("chr1", "-", 1, 2, [23, 24, 25, 26, 27, 28, 29, 30, 31, 32]))
        reads.append(Read("chr1", "+", 4, 2, [30, 31, 32, 33, 34, 40, 41]))
        reads.append(Read("chr1", "-", 1, 1, [42, 43, 44, 45, 46, 47, 48, 49, 50]))

        reads.append(Read("chr1", "+", 10, 1, [51, 52, 53, 54, 55]))
        reads.append(Read("chr2", "+", 10, 1, [18, 19, 20, 21, 22, 23, 24, 25]))

        # starting count
        for count_method in ['all', 'start', 'end']:
            print "\nTesting count_method option: ****{}****".format(count_method)

            output = "{}\n".format(feature1)

            for r in reads:
                output += "{}\n".format(r)
                feature1.count_read(r, count_method, count_partial_reads=True)
                output += "{}\n".format(feature1)

            output += feature1.print_metagene(pretty=True)
            if str(feature1.print_metagene()).strip() == str(expected[method][count_method]).strip():
                print "PASSED\tCreated correct metagene with feature method {} and count method {} ?".format(method,
                                                                                                             count_method)
            else:
                print "**FAILED**\tCreated correct metagene with feature method {} and count method {} ?".format(method,
                                                                                                                 count_method)
                print "\tExpected:\n{}".format(expected[method][count_method])
                print "\tActual  :\n{}".format(feature1.print_metagene())
                print "\tSummary of run:\n{}".format(output)
            feature1 = Feature.create_from_bed(method, metagene[method], feature_line, False,
                                               False)  # zero out counter for next round

    try:
        unstranded_read = Read("chr1", ".", 10, 1, [18, 19, 20, 21, 22, 23, 24, 25])
        feature1.count_read(unstranded_read, 'all')
    except MetageneError as err:
        print "PASSED\tCaught unstranded read on stranded count ?\t\t".format(err)
    else:
        print "**FAILED**\tCaught unstranded read on stranded count ?"

    try:
        feature_line = "{}\t{}\t{}\t{}\t{}\t{}\n".format(1, 20, 40, "first", 44, ".")
        feature1 = Feature.create_from_bed(method, metagene[method], feature_line, False, False)
        unstranded_read = Read("chr1", ".", 10, 1, [18, 19, 20, 21, 22, 23, 24, 25])
        feature1.count_read(unstranded_read, 'all')
    except MetageneError as err:
        print "**FAILED**\tAllowed unstranded read on unstranded count ?\t\t".format(err)
    else:
        print "PASSED\tAllowed unstranded read on unstranded count ?"

    print "\n**** Testing adjust_to_metagene ****\n"

    chromosome_converter = {"1": "chr1", "2": "chr2"}

    # ((metagene_tupple),(feature_tupple),expected_result_string, message_string)
    tests = [((8, 2, 2), (16, 8, 24, 4), '8.000,8.000,4.000,4.000,12.000,12.000,2.000,2.000', "Expand to metagene ?"),
             ((4, 2, 2), (6, 8, 6, 2, 4, 4, 2, 4, 24, 8), '17.000,9.000,8.000,34.000', "Contract to metagene ?"),
             ((4, 2, 2), (2.5, 4, (10.0 / 3), 10, 11, 7.3, 4), '5.500,9.333,17.825,9.475',
              "Contract with messy floats ?"),
             ((3, 2, 2), (2.5, 4, (10.0 / 3), 10, 11, 7.3, 4), '7.611,19.556,14.967',
              "Contract with other messy floats ?")]

    for t in tests:
        metagene = Metagene(*t[0])
        print "\t{}".format(metagene)
        feature_line = "{}\t{}\t{}\n".format(1, 0, len(t[1]))
        feature = Feature.create_from_bed('all', metagene, feature_line, False, False, short=True)
        adjusted_feature = ""
        for f in feature.adjust_to_metagene(t[1]):
            adjusted_feature += "{0:0.3f},".format(f)
        if adjusted_feature[:-1] == t[2]:
            print "PASSED\t{}".format(t[3])
        else:
            print "**FAILED**\t{}".format(t[3])
            print "\tExpected:\t{}".format(t[2])
            print "\tActual  :\t{}".format(adjusted_feature[:-1])
            print "\tOriginal:\t{}".format(feature.adjust_to_metagene(t[1]))

    print "\n**** End of Testing the Feature class ****\n"

# end of Feature.test method
from BalanceNearlyBalancedGraph import balance_graph
from Read import Read
from ToDeBruijnGraph import to_debruijn_graph
from WalkRandomEulerianCycle import walk_eulerian_cycle

with open('/home/user/Downloads/dataset_240261_7(1).txt',
          mode='r',
          encoding='utf-8') as f:
    data = f.read()

lines = data.split('\n')
k = int(lines[0].strip())
kmers = lines[1:]
kmers = [l.strip() for l in kmers]  # get rid of whitespace
kmers = [l for l in kmers if len(l) > 0]  # get rid of empty lines

reads = [Read(kmer) for kmer in kmers]
graph = to_debruijn_graph(reads)

graph, roots, tails = balance_graph(graph)

path = walk_eulerian_cycle(graph, roots.pop())
path.pop(
)  # last conn in cycle is artificial -- it was created from balancing so generating this path would be fast
genome = path[0].stitch(path)
print(f'{genome}')
Example #28
0
from Read import Read
from ToDeBruijnGraph import to_debruijn_graph
from Utils import slide_window

with open('/home/user/Downloads/dataset_240257_6(1).txt',
          mode='r',
          encoding='utf-8') as f:
    data = f.read()

lines = data.split('\n')
k = int(lines[0].strip())
dna = lines[1].strip()

reads = [Read(kmer) for kmer, _ in slide_window(dna, k)]

graph = to_debruijn_graph(reads)
for node, other_nodes in graph.get_all_outputs():
    other_nodes = list(other_nodes)
    if len(other_nodes) == 0:
        continue
    print(f'{node} -> {",".join([str(x) for x in other_nodes])}')
Example #29
0
    x1, y1, x2, y2 = detection[0][0], detection[0][1], detection[1][
        0], detection[1][1]

    x1, y1 = Equations.pixel_to_real(x1, y1)
    x2, y2 = Equations.pixel_to_real(x2, y2)
    th1, th2, th3 = Equations.get_th(x1, y1, x2, y2, 1.5)
    print "coordinates - ", x1, y1, x2, y2
    print "angles - ", th1, th2, th3

    raw_input("press Enter to continue ")

    if th2 > 130 or th2 < -130 or th3 < -130 or th3 > 130:
        raise ("Mechanical constraint")

    else:
        motion_set = Read.read_from_file("mot/normal.mot", th1 + 6.4, th2, th3)

        for motion in motion_set:
            dxl.set_position(motion[0])
            #raw_input()
            sleep(motion[1])

    motion_set = Read.read_from_file("mot/flap2.mot", th1, th2, th3)

    for motion in motion_set:
        dxl.set_position(motion[0])
        #raw_input('flapping now')
        sleep(motion[1])
    '''signs=[
				[1,1,1],
				[1,1,-1],
Example #30
0
    def create_read(self, dna_seq):
        nucleotides = apply_errors(dna_seq)
        qualities = create_qualities(self.read_length, self.avg_quality,
                                     self.quality_min, self.quality_max)

        return Read(nucleotides, qualities)
 def __init__(self):
     #result = hasattr(DataManagement,req)()
     """"""
     self.dm = Read()
Example #32
0
def setup():
    """Create fixtures"""
    # define cigar strings; value: ((args for build_positions), expected_result)
    cigar_string['full_match'] = ((1, "10M", "*"),
                                  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    cigar_string['insertion'] = ((1, "5M4I5M", "*"),
                                 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    cigar_string['deletion'] = ((1, "5M4D5M", "*"),
                                [1, 2, 3, 4, 5, 10, 11, 12, 13, 14])
    cigar_string['gapped_match'] = ((1, "5M3N5M", "*"),
                                    [1, 2, 3, 4, 5, 9, 10, 11, 12, 13])
    cigar_string['softclipped_match'] = ((4, "3S5M", "*"), [4, 5, 6, 7, 8])
    cigar_string['hardclipped_match'] = ((4, "3H5M3H", "*"), [4, 5, 6, 7, 8])
    cigar_string['padded_match'] = ((1, "3P5M", "*"), [4, 5, 6, 7, 8])
    cigar_string['mismatch'] = ((1, "5=1X3=", "*"),
                                [1, 2, 3, 4, 5, 6, 7, 8, 9])
    cigar_string['no_cigar_match'] = ((1, "*", "aaaaa"), [1, 2, 3, 4, 5])
    bad_cigar_string['unknown_length'] = ((1, "*", "*"), "raise MetageneError")
    bad_cigar_string['illegal_cigar'] = ((1, "5M4B", "*"),
                                         "raise MetageneError")
    bad_cigar_string['misordered_cigar'] = ((1, "M5N4M5", "*"),
                                            "raise MetageneError")

    # define bitwise flags; value: ((args for parse_sam_bitwise_flag), expected_result(count?, reverse_complemented?))
    bitwise_flag['unmapped'] = ((int("0b000000000100", 2), ), (False, False))
    bitwise_flag['unmapped_withflags'] = ((int("0b100111011101",
                                               2), ), (False, True))
    bitwise_flag['plus_strand'] = ((int("0b000000000000", 2), ), (True, False))
    bitwise_flag['minus_strand'] = ((int("0b000000010000", 2), ), (True, True))
    bitwise_flag['multiple_segments'] = ((int("0b000000000001",
                                              2), ), (True, False))
    # try various default and user-changed boolean flags
    bitwise_flag['count_secondary_alignment'] = ((int("0b000100000000",
                                                      2), ), (True, False))
    bitwise_flag['skip_secondary_alignment'] = ((int("0b000100000000", 2),
                                                 False, False, False, True,
                                                 False, False), (False, False))
    bitwise_flag['skip_failed_quality_control'] = ((int("0b001000000000",
                                                        2), ), (False, False))
    bitwise_flag['count_failed_quality_control'] = ((int("0b001000000000",
                                                         2), True, True, False,
                                                     True, False, False),
                                                    (True, False))
    bitwise_flag['skip_PCR_optical_duplicate'] = ((int("0b010000000000",
                                                       2), ), (False, False))
    bitwise_flag['count_PCR_optical_duplicate'] = ((int("0b010000000000",
                                                        2), True, False, True,
                                                    True, False, False),
                                                   (True, False))
    bitwise_flag['count_supplementary_alignment'] = ((int("0b100000000000",
                                                          2), ), (True, False))
    bitwise_flag['skip_supplementary_alignment'] = ((int("0b100000000000",
                                                         2), True, False,
                                                     False, False, False,
                                                     False), (False, False))
    bitwise_flag['count_only_start_success'] = ((int("0b000001000001", 2),
                                                 True, False, False, True,
                                                 True, False), (True, False))
    bitwise_flag['count_only_start_fail'] = ((int("0b000000000001",
                                                  2), True, False, False, True,
                                              True, False), (False, False))
    bitwise_flag['count_only_end_success'] = ((int("0b000010000001", 2), True,
                                               False, False, True, False,
                                               True), (True, False))
    bitwise_flag['count_only_end_fail'] = ((int("0b000000000001",
                                                2), True, False, False, True,
                                            False, True), (False, False))
    bad_bitwise_flag['count_only_both'] = ((int("0b000011000001", 2), True,
                                            False, False, True, True, True),
                                           ("Raise MetageneError", ))

    # define good and bad samline inputs
    good_input['no_tags'] = (0, "chr1", 200, "10M", 10, 1, 1, "+")
    good_input['plus_strand_match'] = (0, "chr1", 200, "10M", 10, 2, 4, "+")
    good_input['minus_strand_match'] = (16, "chr1", 200, "10M", 10, 2, 4, "-")
    good_input['no_match'] = (4, "*", 0, "*", 10, 1, 1, ".")

    sample = [
        "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4",
        "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4",
        "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4"
    ]

    Read.process_set_sam_tag(sample, count_tag=True, tag_regex='NA:i:(\d+)')
    Read.process_set_sam_tag(sample, count_tag=True, tag_regex='NH:i:(\d+)')
 def __init__(self, abs_path = ""):
     """"""
     self.abs_path = abs_path
     self.matrix = Matrix.CreateMatrix(abs_path = abs_path)
     self.ner = NerExtraction.NerExtraction()
     self.gmd = Read()
Example #34
0
def check_catch_bad_input(test, samline):
    print Read(sam_line)
Example #35
0
        "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4",
        "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4",
        "NA:i:4\tNH:i:4", "NA:i:4\tNH:i:4"
    ]

    Read.process_set_sam_tag(sample, count_tag=True, tag_regex='NA:i:(\d+)')
    Read.process_set_sam_tag(sample, count_tag=True, tag_regex='NH:i:(\d+)')


def test_build_positions():
    for test in cigar_string:
        yield (check_build_positions, test, cigar_string[test])


def check_build_positions(test, (values, expected)):
    position_array = Read.build_positions(*values)
    test_description = "\nTest:    \t{}\n".format(test)
    test_description += "Expected:\t{}\n".format(expected)
    test_description += "Position:\t{}\n".format(position_array)
    assert position_array == expected, "{}Error:   \tDid not create the expected position array.".format(
        test_description)


def test_catch_bad_cigar_input():
    for test in bad_cigar_string:
        yield (check_catch_bad_cigar_input, test, bad_cigar_string[test])


@raises(MetageneError)
def check_catch_bad_cigar_input(test, (values, expected)):
    print Read.build_positions(*values)
from random import shuffle

from Read import Read
from ToOverlapGraphHash import to_overlap_graph
from Utils import enumerate_patterns

segments = [Read(segment) for segment in enumerate_patterns(4, '01')]
print(f'{segments}')

graph = to_overlap_graph(segments)

graph_items_randomized = list(graph.get_all_outputs())
shuffle(graph_items_randomized)
for segment, other_segments in graph_items_randomized:
    other_segments = list(other_segments)
    print(f'{segment} -> {",".join([str(x) for x in other_segments])}')


def walk(path):
    if len(path) == len(graph):
        print(f'{path} -> {path[0].stitch(path)}')
        return

    n = path[-1]
    for child_n in graph.get_outputs(n):
        if child_n not in path:
            path.append(child_n)
            walk(path)
            path.pop()

from Read import Read
from ToOverlapGraphHash import to_overlap_graph

with open('/home/user/Downloads/dataset_240256_10(1).txt', mode='r', encoding='utf-8') as f:
    data = f.read()

lines = data.split('\n')
dnas = lines[:]
dnas = [l.strip() for l in dnas] # get rid of whitespace
dnas = [l for l in dnas if len(l) > 0] # get rid of empty lines

reads = [Read(kmer) for kmer in dnas]
overlaps = to_overlap_graph(reads)
for kmer, other_kmers in overlaps.get_all_outputs():
    other_kmers = list(other_kmers)
    if len(other_kmers) == 0:
        continue
    print(f'{kmer} -> {",".join([str(x) for x in other_kmers])}')
Example #38
0
def metagene_count():
    """Chain of command for metagene_count analysis."""
    arguments = get_arguments()
    # confirm BAM file and extract chromosome sizes
    Read.set_chromosome_sizes(arguments.alignment)
    ##TODO: create a list of chromosomes to analyze and/or exclude
    # create chromosome conversion dictionary for feature (GFF/BED) to alignment (BAM)
    Feature.set_chromosome_conversion(arguments.chromosome_names,
                                      Read.chromosome_sizes.keys())

    # define has_abundance and has_mappings tags for Read class
    Read.set_sam_tag(arguments.extract_abundance, arguments.alignment,
                     "NA:i:(\d+)")
    Read.set_sam_tag(arguments.extract_mappings, arguments.alignment,
                     "NH:i:(\d+)")

    # define the metagene array shape (left padding, start, internal, end, right padding)
    # metagene = padding ---- internal region ---- padding
    try:
        metagene = Metagene(arguments.interval_size, arguments.padding,
                            arguments.padding)
        print "Metagene definition:\t{}".format(metagene)
    except MetageneError as err:
        print err
        raise MetageneError("Unable to create the metagene template")

    try:
        Feature.set_format(
            arguments.feature)  # assign file format for the feature file
        print "Reading feature file as {} format".format(Feature.format)
    except MetageneError as err:
        print err
        raise MetageneError("Unable to create the feature object")

    # print out the header line...
    if not arguments.interval_variable:
        with open("{}.metagene_counts.csv".format(arguments.output_prefix),
                  'w') as output_file:
            output_file.write("# Metagene:\t{}\n".format(
                metagene))  # define for plotting later
            output_file.write(metagene.print_full())

    # for each feature
    with open(arguments.feature, 'r') as feature_file:
        for feature_line in read_chunk(feature_file, 1024):
            if feature_line[0] != "#":  # skip comment lines
                # change creation with feature_method
                feature = Feature.create(arguments.feature_count, metagene,
                                         feature_line,
                                         arguments.count_splicing,
                                         arguments.ignore_strand)

                # pull out sam file lines; it is important to use Feature.get_samtools_region(chromosome_lengths) rather
                # than Feature.get_chromosome_region() because only the first ensures that the interval does not
                # extend beyond the length of the chromosome which makes samtools view return no reads
                (run_pipe_worked, sam_sample) = run_pipe([
                    'samtools view {} {}'.format(arguments.alignment,
                                                 feature.get_samtools_region())
                ])
                if run_pipe_worked:
                    for samline in sam_sample:
                        if len(samline) > 0:
                            # create Read feature
                            (created_read, read) = Read.create_from_sam(
                                samline,
                                Feature.chromosome_conversion.values(),
                                arguments.count_method,
                                arguments.uniquely_mapping,
                                arguments.ignore_strand,
                                arguments.count_secondary_alignments,
                                arguments.count_failed_quality_control,
                                arguments.count_PCR_optical_duplicate,
                                arguments.count_supplementary_alignment)

                            # count read (if it exists)
                            if created_read:
                                feature.count_read(
                                    read, arguments.count_method,
                                    arguments.count_splicing,
                                    arguments.count_partial_reads,
                                    arguments.ignore_strand)

                    # output the resulting metagene
                    with open(
                            "{}.metagene_counts.csv".format(
                                arguments.output_prefix), 'a') as output_file:
                        output_file.write("{}\n".format(
                            feature.print_metagene(interval_override=arguments.
                                                   interval_variable)))

                else:
                    raise MetageneError(
                        "Could not pull chromosomal region {} for feature {} from BAM file {}."
                        .format(feature.get_chromosome_region(), feature.name,
                                arguments.alignment))
Example #39
0
class Paired:
  """
  Represents paired reads and single read when mate is None
  """
  _readStrand = True # strand of read
  _mateStrand = False # strand of mate
  rtype = enum(# type of read
               NORMAL=0,
               SINGLE=1,
               READ_SPLIT=2,
               MATE_SPLIT=3,
               FILTERED=4
              )

  def __init__(self, read, mate, reflengths, refnames, splitparts):
    """
    Initialize reads and find out type of reads
    """
    if mate and Paired.isFirst(mate, read):
      self.__read = Read(mate, True, Paired._readStrand, refnames)
      self.__mate = Read(read, False, Paired._mateStrand, refnames)
    else:
      self.__read = Read(read, True, Paired._readStrand, refnames)
      self.__mate = Read(mate, False, Paired._mateStrand, refnames)

    self.__qname = read.qname
    self.__reflengths = reflengths
    self.__splitpair = None

    if not self.__read.hasMinQuality(): # read doesn't have minimal mapping quality
      if self.__mate.isUnmapped() or not self.__mate.hasMinQuality(): # both don't have minimal mapping quality
        self.__type = Paired.rtype.FILTERED
      else: # make mate single
        self.__read = self.__mate
        self.__mate = None
        self.__type = Paired.rtype.SINGLE
    elif self.__mate.isUnmapped(): # mate unmapped
      self.__type = Paired.rtype.SINGLE
    elif not self.__mate.hasMinQuality(): # mate doesn't have minimal mapping quality
      self.__type = Paired.rtype.SINGLE
      self.__mate = None
    elif self.__read.isDuplicate(): # read is duplicate
      if self.__mate.isDuplicate(): # both are duplicated -> filter out
        self.__type = Paired.rtype.FILTERED
      else: # only read is duplicte -> make mate single
        self.__type = Paired.rtype.SINGLE
        self.__read = self.__mate
        self.__mate = None
    elif self.__mate.isDuplicate(): # mate is duplicate -> make read single
      self.__type = Paired.rtype.SINGLE
      self.__mate = None
    elif self.__read.isSplit(): # read is split
      if self.__mate.isInverted() or self.__mate.hasGaps() or self.isInterchromosomal(): # filter both
        self.__type = Paired.rtype.FILTERED
      else: # split read
        self.__type = Paired.rtype.READ_SPLIT
        self.__splitpair = SplitPair(False, self.__mate, SplitRead(self.__read, Paired._readStrand, splitparts))
    elif self.__mate.isSplit(): # mate is split
      if self.__read.isInverted() or self.__read.hasGaps() or self.isInterchromosomal(): # filter both
        self.__type = Paired.rtype.FILTERED
      else: # split mate
        self.__type = Paired.rtype.MATE_SPLIT
        self.__splitpair = SplitPair(True, self.__read, SplitRead(self.__mate, Paired._mateStrand, splitparts))
    elif not self.__read.hasGaps() and not self.__mate.hasGaps(): # paired without any gaps
      self.__type = Paired.rtype.NORMAL
    else:
      self.__type = Paired.rtype.FILTERED

    if self.__splitpair and (not self.__splitpair.splitread.hasMinQuality() or not self.__splitpair.splitread.hasMinLengths() or self.actualSize() <= 0):
      self.__type = Paired.rtype.FILTERED

  @staticmethod
  def isFirst(read, mate):
    """
    Test if read is before his mate
    """
    return (read.pos <= mate.pos and read.tid == mate.tid) or read.tid < mate.tid

  @property
  def qname(self):
    """
    Return query name
    """
    return self.__qname

  @property
  def read(self):
    """
    Return read
    """
    return self.__read

  @property
  def mate(self):
    """
    Return mate
    """
    return self.__mate

  @property
  def splitpair(self):
    """
    Return split read
    """
    return self.__splitpair

  def size(self):
    """
    Return insert size information from read
    """
    if not self.isNormal() or self.hasOverlap() or self.isRearranged() or self.__read.isInverted() or self.__mate.isInverted() or self.isInterchromosomal():
      return 0

    return self.__read.sam.tlen - (self.__read.end - self.__read.pos) - (self.__mate.end - self.__mate.pos)

  def actualSize(self):
    """
    Return counted insert size if template length is zero
    """
    size = self.size()

    if size or self.isSingle(): # size from read or read is single
      return size
    elif self.hasOverlap(): # overlapping pair
      if self.isRearranged(): # and also rearranged
        return self.__read.pos - self.__mate.end

      return self.__mate.pos - self.__read.end
    else: # normal or rearranged pair
      lengthBetween = 0

      if self.__read.tid != self.__mate.tid: # another chromosome
        lengthBetween = sum(self.__reflengths[self.__read.tid:self.__mate.tid])

      result = lengthBetween + self.__mate.pos - self.__read.end - 1

      if self.isRearranged():
        return -(result + self.__read.len + self.__mate.len)

      return result

  def hasOverlap(self):
    """
    Test if reads overlap
    """
    return self.__read.pos <= self.__mate.pos and \
            ((self.__read.end <= self.__mate.end and \
              self.__mate.pos <= self.__read.end) or \
             (self.__mate.end <= self.__read.end))

  def isNormal(self):
    """
    Test if reads are normal
    """
    return self.__type == Paired.rtype.NORMAL

  def isSingle(self):
    """
    Test if read is single
    """
    return self.__type == Paired.rtype.SINGLE

  def isReadSplit(self):
    """
    Test if read is split
    """
    return self.__type == Paired.rtype.READ_SPLIT

  def isMateSplit(self):
    """
    Test if mate is split
    """
    return self.__type == Paired.rtype.MATE_SPLIT

  def isFiltered(self):
    """
    Test if read is filtered
    """
    return self.__type == Paired.rtype.FILTERED

  def isRearranged(self):
    """
    Test if reads are rearranged
    """
    return self.__read.isInverted() and self.__mate.isInverted()

  def isInterchromosomal(self):
    """
    Test if reads are on different chromosomes
    """
    return self.__read.tid != self.__mate.tid
Example #40
0
    else:
        continue_loop = False
        exit(0)

while continue_loop:
    menu_input = int(input("[1] Add new ToDo file and add your text"
                           "\n[2] Append to an existing file\n""[3] Read from an existing file\n"))
    if menu_input == 1:
        to_do = Add_to_dos()
        to_do.add_file()
        return_to_main()
    elif menu_input == 2:
        try:
            append = Append()
            append.append_file()
            return_to_main()
        except OSError as e:
            print("Error!\nThe file was not found")

    elif menu_input == 3:
        try:
            read = Read()
            read.read_file()
        except OSError as e:
            print("Error!\nThe file was not found")
        #except:
            #print("Error \nAnother error occurred, please try again")
        return_to_main()
    else:
        print("Error \nCould not find the menu choice in your input")
        return_to_main()
Example #41
0
def setup():
    """Create fixtures"""
    # define cigar strings; value: ((args for build_positions), expected_result)
    cigar_string['full_match'] = ((1, "10M", "*"), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    cigar_string['insertion'] = ((1, "5M4I5M", "*"), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    cigar_string['deletion'] = ((1, "5M4D5M", "*"), [1, 2, 3, 4, 5, 10, 11, 12, 13, 14])
    cigar_string['gapped_match'] = ((1, "5M3N5M", "*"), [1, 2, 3, 4, 5, 9, 10, 11, 12, 13])
    cigar_string['softclipped_match'] = ((4, "3S5M", "*"), [4, 5, 6, 7, 8])
    cigar_string['hardclipped_match'] = ((4, "3H5M3H", "*"), [4, 5, 6, 7, 8])
    cigar_string['padded_match'] = ((1, "3P5M", "*"), [4, 5, 6, 7, 8])
    cigar_string['mismatch'] = ((1, "5=1X3=", "*"), [1, 2, 3, 4, 5, 6, 7, 8, 9])
    cigar_string['no_cigar_match'] = ((1, "*", "aaaaa"), [1, 2, 3, 4, 5])
    bad_cigar_string['unknown_length'] = ((1, "*", "*"), "raise MetageneError")
    bad_cigar_string['illegal_cigar'] = ((1, "5M4B", "*"), "raise MetageneError")
    bad_cigar_string['misordered_cigar'] = ((1, "M5N4M5", "*"), "raise MetageneError")

    # define bitwise flags; value: ((args for parse_sam_bitwise_flag), expected_result(count?, reverse_complemented?))
    bitwise_flag['unmapped'] = ((int("0b000000000100", 2),), (False, False))
    bitwise_flag['unmapped_withflags'] = ((int("0b100111011101", 2),), (False, True))
    bitwise_flag['plus_strand'] = ((int("0b000000000000", 2),), (True, False))
    bitwise_flag['minus_strand'] = ((int("0b000000010000", 2),), (True, True))
    bitwise_flag['multiple_segments'] = ((int("0b000000000001", 2),), (True, False))
    # try various default and user-changed boolean flags
    bitwise_flag['count_secondary_alignment'] = ((int("0b000100000000", 2),), (True, False))
    bitwise_flag['skip_secondary_alignment'] = (
        (int("0b000100000000", 2), False, False, False, True, False, False), (False, False))
    bitwise_flag['skip_failed_quality_control'] = ((int("0b001000000000", 2),), (False, False))
    bitwise_flag['count_failed_quality_control'] = (
        (int("0b001000000000", 2), True, True, False, True, False, False), (True, False))
    bitwise_flag['skip_PCR_optical_duplicate'] = ((int("0b010000000000", 2),), (False, False))
    bitwise_flag['count_PCR_optical_duplicate'] = (
        (int("0b010000000000", 2), True, False, True, True, False, False), (True, False))
    bitwise_flag['count_supplementary_alignment'] = ((int("0b100000000000", 2),), (True, False))
    bitwise_flag['skip_supplementary_alignment'] = (
        (int("0b100000000000", 2), True, False, False, False, False, False), (False, False))
    bitwise_flag['count_only_start_success'] = (
        (int("0b000001000001", 2), True, False, False, True, True, False), (True, False))
    bitwise_flag['count_only_start_fail'] = (
        (int("0b000000000001", 2), True, False, False, True, True, False), (False, False))
    bitwise_flag['count_only_end_success'] = (
        (int("0b000010000001", 2), True, False, False, True, False, True), (True, False))
    bitwise_flag['count_only_end_fail'] = (
        (int("0b000000000001", 2), True, False, False, True, False, True), (False, False))
    bad_bitwise_flag['count_only_both'] = (
        (int("0b000011000001", 2), True, False, False, True, True, True), ("Raise MetageneError",))

    # define good and bad samline inputs
    good_input['no_tags'] = (0, "chr1", 200, "10M", 10, 1, 1, "+")
    good_input['plus_strand_match'] = (0, "chr1", 200, "10M", 10, 2, 4, "+")
    good_input['minus_strand_match'] = (16, "chr1", 200, "10M", 10, 2, 4, "-")
    good_input['no_match'] = (4, "*", 0, "*", 10, 1, 1, ".")

    sample = ["NA:i:4\tNH:i:4",
              "NA:i:4\tNH:i:4",
              "NA:i:4\tNH:i:4",
              "NA:i:4\tNH:i:4",
              "NA:i:4\tNH:i:4",
              "NA:i:4\tNH:i:4",
              "NA:i:4\tNH:i:4",
              "NA:i:4\tNH:i:4",
              "NA:i:4\tNH:i:4",
              "NA:i:4\tNH:i:4"]

    Read.process_set_sam_tag(sample, count_tag=True, tag_regex='NA:i:(\d+)')
    Read.process_set_sam_tag(sample, count_tag=True, tag_regex='NH:i:(\d+)')
Example #42
0
reads_filepath = 'FinalChallengeReads.txt.xz'
with lzma.open(reads_filepath, mode='rt', encoding='utf-8') as f:
    lines = f.read().splitlines()
    lines = [l.strip() for l in lines]  # get rid of whitespace
    lines = [l for l in lines if len(l) > 0]  # get rid of empty lines

lines_split = [tuple(l.split('|', maxsplit=2)) for l in lines]
kdmers = [Kdmer(k1, k2, 1000) for k1, k2 in lines_split]
rps = [ReadPair(kdmer) for kdmer in kdmers]
broken_rps = [broken_rp for rp in rps for broken_rp in rp.shatter(40)]

broken_rps = list(set(broken_rps))

graph = to_debruijn_graph(broken_rps)
contig_paths = find_maximal_non_branching_paths(graph)

contig_paths.sort(key=lambda x: len(x))

for path in contig_paths:
    if len(path) >= path[0].d:
        out = path[0].stitch(path)
        print(f'{len(path)} kd-mers = {out}')
    else:
        heads = [Read(p.data.head) for p in path]
        heads_out = heads[0].stitch(heads)
        tails = [Read(p.data.tail) for p in path]
        tails_out = tails[0].stitch(tails)
        print(f'{len(heads)} k-mers = {heads_out}')
        print(f'{len(tails)} k-mers = {tails_out}')
Example #43
0
def setup():
    """Create fixtures"""

    # Define chromosome sizes
    Read.extract_chromosome_sizes([
        "@HD\tVN:1.0\tSO:unsorted", "@SQ\tSN:chr1\tLN:300",
        "@SQ\tSN:chr2\tLN:200", "@PG\tID:test\tVN:0.1"
    ])
    Feature.process_set_chromosome_conversion(["1\tchr1", "2\tchr2"])

    good_input["bed input counting all of the read"] = (
        "all",
        "[17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]"
    )
    good_input["bed input counting start of the read"] = (
        "start", "[17, 18, 19, 20, 21, 22, 23]")
    good_input["bed input counting end of the read"] = (
        "end", "[36, 37, 38, 39, 40, 41, 42]")
    good_input["gff input counting all of the read"] = (
        "all",
        "[43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8]"
    )
    good_input["gff input counting start of the read"] = (
        "start", "[43, 42, 41, 40, 39, 38, 37]")
    good_input["gff input counting end of the read"] = (
        "end", "[14, 13, 12, 11, 10, 9, 8]")

    for method in ['all', 'start', 'end']:
        print "\nTesting feature_count option: ****{}****".format(method)

        if method == 'all':
            metagene = Metagene(10, 4, 2)
            print "\t  with Metagene:\t{}".format(metagene)
            print "\t  with chromosome conversions:\t{}".format(
                Feature.chromosome_conversion)
        else:
            metagene = Metagene(1, 4, 2)
            print "\t  with Metagene:\t{}".format(metagene)
            print "\t  with chromosome conversions:\t{}".format(
                Feature.chromosome_conversion)

        # create feature from BED line
        try:
            bedline = "{}\t{}\t{}\t{}\t{}\t{}\n".format(
                1, 20, 40, "first", 44, "+")
            print "\t  with BED line:\t{}".format(bedline.strip())
            feature1 = Feature.create_from_bed(method, metagene, bedline,
                                               False, False)
            if str(feature1.position_array) != correct_features['bed'][method]:
                print "**FAILED**\t  Create Feature from BED line ?"
                print "\t  Desired positions:\t{}".format(
                    correct_features['bed'][method])
                print "\t  Created positions:\t{}".format(
                    feature1.position_array)
        except MetageneError as err:
            print "**FAILED**\t  Create Feature from BED line ?"
        else:
            print "PASSED\t  Create Feature from BED line ?\t\t{}".format(
                feature1.get_chromosome_region())

        # create feature from GFF line
        try:
            gffline = "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
                2, "test", "gene", 10, 39, ".", "-", ".", "second")
            print "\t  with GFF line:\t{}".format(gffline.strip())
            feature2 = Feature.create_from_gff(method, metagene, gffline,
                                               False, False)
            if str(feature2.position_array) != correct_features['gff'][method]:
                print "**FAILED**\t  Create Feature from GFF line ?\t**FAIL**"
                print "\t  Desired positions:\t{}".format(
                    correct_features['gff'][method])
                print "\t  Created positions:\t{}".format(
                    feature2.position_array)
        except MetageneError as err:
            print "**FAILED**\t  Create Feature from GFF line ?"
        else:
            print "PASSED\t  Create Feature from GFF line ?\t\t{}".format(
                feature2.get_chromosome_region())

        # create feature from GFF line with start and end swapped
        try:
            gffline = "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
                2, "test", "gene", 39, 10, ".", "-", ".", "second")
            print "\t  with GFF line:\t{}".format(gffline.strip())
            feature2 = Feature.create_from_gff(method, metagene, gffline,
                                               False, False)
            if str(feature2.position_array) != correct_features['gff'][method]:
                print "**FAILED**\t  Create Feature from GFF line with swapped start and end ?\t**FAIL**"
                print "\t  Desired positions:\t{}".format(
                    correct_features['gff'][method])
                print "\t  Created positions:\t{}".format(
                    feature2.position_array)
        except MetageneError as err:
            print "**FAILED**\t  Create Feature from GFF line with swapped start and end ?"
        else:
            print "PASSED\t  Create Feature from GFF line with swapped start and end ?\t\t{}".format(
                feature2.get_chromosome_region())
        try:
            gffline = "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
                2, "test", "gene", 39, 10, ".", "+", ".", "second")
            print "\t  with GFF line:\t{}".format(gffline.strip())
            feature2 = Feature.create_from_gff(method, metagene, gffline,
                                               False, False)
            if str(feature2.position_array) != correct_features['gff'][method]:
                print "**FAILED**\t  Do not create Feature from GFF line with swapped start and end, + strand ?\t**FAIL**"
                print "\t  Desired positions:\t{}".format(
                    correct_features['gff'][method])
                print "\t  Created positions:\t{}".format(
                    feature2.position_array)
        except MetageneError as err:
            print "PASSED\t  Do not create Feature from GFF line with swapped start and end, + strand ?\t\t{}".format(
                err)
        else:
            print "**FAILED**\t  Do not create Feature from GFF line with swapped start and end, + strand ?\t\t{}".format(
                feature2.get_chromosome_region())

        ##TODO finish complete testing of Feature class
    print "\n##TODO finish testing of Feature class creation\n"

    print "\n**** Testing counting and maniputlation ****\n"

    expected = {'all': {}, 'start': {}, 'end': {}}
    #  Positions in metagene:                           17    18     19   20  21-22,23-24,25-26,27-28,29-30,31-32,33-34,35-36,37-38,39-40,  41,   42
    expected['all'] = {
        'all':
        "first,sense:allreads,0.333,0.333,0.000,0.000,0.000,0.000,0.000,0.000,0.286,0.571,0.571,0.000,0.000,0.286,0.286,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.100,0.100,0.100,0.100,0.100,0.000,0.000,0.000,0.000,0.000,0.111",
        'start':
        "first,sense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.500,0.000,0.000,0.000,0.000,0.000,0.000",
        'end':
        "first,sense:allreads,0.000,3.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.500,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000"
    }
    #  Positions in metagene:                           17    18    19    20   [21]   22    23
    expected['start'] = {
        'all':
        "first,sense:allreads,0.333,0.333,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.050",
        'start':
        "first,sense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000",
        'end':
        "first,sense:allreads,0.000,3.000,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.500"
    }
    #  Positions in metagene:                           36    37    38    39   [40]   41    42
    expected['end'] = {
        'all':
        "first,sense:allreads,0.000,0.000,0.000,0.000,0.286,0.286,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.111",
        'start':
        "first,sense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,0.000",
        'end':
        "first,sense:allreads,0.000,0.000,0.000,0.000,0.000,2.000,0.000\nfirst,antisense:allreads,0.000,0.000,0.000,0.000,0.000,0.000,1.000"
    }

    metagene = {
        'all': Metagene(10, 4, 2),
        'start': Metagene(1, 4, 2),
        'end': Metagene(1, 4, 2)
    }

    for method in ['all', 'start', 'end']:
        if method == 'all':
            print "\t  with Metagene:\t{}".format(metagene[method])
            print "\t  with chromosome conversions:\t{}".format(
                Feature.chromosome_conversion)
        else:
            print "\t  with Metagene:\t{}".format(metagene[method])
            print "\t  with chromosome conversions:\t{}".format(
                Feature.chromosome_conversion)

        print "\nTesting feature_count option: ****{}****".format(method)
        feature_line = "{}\t{}\t{}\t{}\t{}\t{}\n".format(
            1, 20, 40, "first", 44, "+")
        feature1 = Feature.create_from_bed(method, metagene[method],
                                           feature_line, False, False)
        print "\tFeature:\t{}".format(feature1.position_array)

        reads = []
        reads.append(
            Read("chr1", "+", 3, 1, [10, 11, 12, 13, 14, 15, 16, 17, 18]))
        reads.append(
            Read("chr1", "-", 1, 2, [23, 24, 25, 26, 27, 28, 29, 30, 31, 32]))
        reads.append(Read("chr1", "+", 4, 2, [30, 31, 32, 33, 34, 40, 41]))
        reads.append(
            Read("chr1", "-", 1, 1, [42, 43, 44, 45, 46, 47, 48, 49, 50]))

        reads.append(Read("chr1", "+", 10, 1, [51, 52, 53, 54, 55]))
        reads.append(Read("chr2", "+", 10, 1,
                          [18, 19, 20, 21, 22, 23, 24, 25]))

        # starting count
        for count_method in ['all', 'start', 'end']:
            print "\nTesting count_method option: ****{}****".format(
                count_method)

            output = "{}\n".format(feature1)

            for r in reads:
                output += "{}\n".format(r)
                feature1.count_read(r, count_method, count_partial_reads=True)
                output += "{}\n".format(feature1)

            output += feature1.print_metagene(pretty=True)
            if str(feature1.print_metagene()).strip() == str(
                    expected[method][count_method]).strip():
                print "PASSED\tCreated correct metagene with feature method {} and count method {} ?".format(
                    method, count_method)
            else:
                print "**FAILED**\tCreated correct metagene with feature method {} and count method {} ?".format(
                    method, count_method)
                print "\tExpected:\n{}".format(expected[method][count_method])
                print "\tActual  :\n{}".format(feature1.print_metagene())
                print "\tSummary of run:\n{}".format(output)
            feature1 = Feature.create_from_bed(
                method, metagene[method], feature_line, False,
                False)  # zero out counter for next round

    try:
        unstranded_read = Read("chr1", ".", 10, 1,
                               [18, 19, 20, 21, 22, 23, 24, 25])
        feature1.count_read(unstranded_read, 'all')
    except MetageneError as err:
        print "PASSED\tCaught unstranded read on stranded count ?\t\t".format(
            err)
    else:
        print "**FAILED**\tCaught unstranded read on stranded count ?"

    try:
        feature_line = "{}\t{}\t{}\t{}\t{}\t{}\n".format(
            1, 20, 40, "first", 44, ".")
        feature1 = Feature.create_from_bed(method, metagene[method],
                                           feature_line, False, False)
        unstranded_read = Read("chr1", ".", 10, 1,
                               [18, 19, 20, 21, 22, 23, 24, 25])
        feature1.count_read(unstranded_read, 'all')
    except MetageneError as err:
        print "**FAILED**\tAllowed unstranded read on unstranded count ?\t\t".format(
            err)
    else:
        print "PASSED\tAllowed unstranded read on unstranded count ?"

    print "\n**** Testing adjust_to_metagene ****\n"

    chromosome_converter = {"1": "chr1", "2": "chr2"}

    # ((metagene_tupple),(feature_tupple),expected_result_string, message_string)
    tests = [((8, 2, 2), (16, 8, 24, 4),
              '8.000,8.000,4.000,4.000,12.000,12.000,2.000,2.000',
              "Expand to metagene ?"),
             ((4, 2, 2), (6, 8, 6, 2, 4, 4, 2, 4, 24, 8),
              '17.000,9.000,8.000,34.000', "Contract to metagene ?"),
             ((4, 2, 2), (2.5, 4, (10.0 / 3), 10, 11, 7.3, 4),
              '5.500,9.333,17.825,9.475', "Contract with messy floats ?"),
             ((3, 2, 2), (2.5, 4, (10.0 / 3), 10, 11, 7.3, 4),
              '7.611,19.556,14.967', "Contract with other messy floats ?")]

    for t in tests:
        metagene = Metagene(*t[0])
        print "\t{}".format(metagene)
        feature_line = "{}\t{}\t{}\n".format(1, 0, len(t[1]))
        feature = Feature.create_from_bed('all',
                                          metagene,
                                          feature_line,
                                          False,
                                          False,
                                          short=True)
        adjusted_feature = ""
        for f in feature.adjust_to_metagene(t[1]):
            adjusted_feature += "{0:0.3f},".format(f)
        if adjusted_feature[:-1] == t[2]:
            print "PASSED\t{}".format(t[3])
        else:
            print "**FAILED**\t{}".format(t[3])
            print "\tExpected:\t{}".format(t[2])
            print "\tActual  :\t{}".format(adjusted_feature[:-1])
            print "\tOriginal:\t{}".format(feature.adjust_to_metagene(t[1]))

    print "\n**** End of Testing the Feature class ****\n"


# end of Feature.test method
Example #44
0

def onResult(data):
    print "PLC Time is", data
    return data


d.addCallback(onResult)
#plcTime.set(None) # Set PLC to current server time

# Start listening as HTTP server
root = File("www")
#root.putChild("membrane_insert", Membrane(plc))
root.putChild("events", EventSource(plc))
root.putChild("write", Write(plc))
root.putChild("read", Read(plc))
root.putChild("logger", Logger(plc))
root.putChild("membrane", Membrane())
root.putChild("product", Product())
root.putChild("chemical", Chemical())
root.putChild("bag-filter", BagFilter())

factory = Site(root)
reactor.listenTCP(8000, factory)
reactor.run()
exit()

# /////////////////////////
# What follows is test code

# Read log value