def testfilterStrand(self):
        """
        computeMatrixOperations filterStrand
        """
        dCorrect = {"verbose": True, "scale": 1, "skip zeros": False, "nan after end": False, "sort using": "mean", "unscaled 5 prime": 0, "body": 1000, "sample_labels": ["SRR648667.forward", "SRR648668.forward", "SRR648669.forward", "SRR648670.forward", "SRR648667.reverse", "SRR648668.reverse", "SRR648669.reverse", "SRR648670.reverse"], "downstream": 0, "unscaled 3 prime": 0, "group_labels": ["genes"], "bin size": 10, "upstream": 0, "group_boundaries": [0, 107], "sample_boundaries": [0, 100, 200, 300, 400, 500, 600, 700, 800], "max threshold": None, "ref point": None, "min threshold": None, "sort regions": "no", "proc number": 20, "bin avg type": "mean", "missing data as zero": False}
        oname = "/tmp/filterStrand1.mat.gz"
        args = "filterStrand -m {} -o {} --strand +".format(self.matrix, oname)
        args = args.split()
        cmo.main(args)
        f = gzip.GzipFile(oname)
        d = getHeader(f)  # Skip the header, which can be in a different order
        h = hashlib.md5(f.read()).hexdigest()
        f.close()
        assert(d == dCorrect)
        assert(h == "300f8000be5b5f51e803b57ef08f1c9e")
        os.remove(oname)

        dCorrect = {u'verbose': True, u'scale': 1, u'skip zeros': False, u'nan after end': False, u'sort using': u'mean', u'unscaled 5 prime': 0, u'body': 1000, u'sample_labels': [u'SRR648667.forward', u'SRR648668.forward', u'SRR648669.forward', u'SRR648670.forward', u'SRR648667.reverse', u'SRR648668.reverse', u'SRR648669.reverse', u'SRR648670.reverse'], u'downstream': 0, u'unscaled 3 prime': 0, u'group_labels': [u'genes'], u'bin size': 10, u'upstream': 0, u'group_boundaries': [0, 89], u'sample_boundaries': [0, 100, 200, 300, 400, 500, 600, 700, 800], u'missing data as zero': False, u'ref point': None, u'min threshold': None, u'sort regions': u'no', u'proc number': 20, u'bin avg type': u'mean', u'max threshold': None}
        oname = "/tmp/filterStrand2.mat.gz"
        args = "filterStrand -m {} -o {} --strand -".format(self.matrix, oname)
        args = args.split()
        cmo.main(args)
        f = gzip.GzipFile(oname)
        d = getHeader(f)  # Skip the header, which can be in a different order
        h = hashlib.md5(f.read()).hexdigest()
        f.close()
        assert(d == dCorrect)
        assert(h == "0a6ca070a5ba4564f1ab950ac3b7c8f1")
        os.remove(oname)
Beispiel #2
0
    def testfilterStrand(self):
        """
        computeMatrixOperations filterStrand
        """
        dCorrect = {"verbose": True, "scale": 1, "skip zeros": False, "nan after end": False, "sort using": "mean", "unscaled 5 prime": [0, 0, 0, 0, 0, 0, 0, 0], "body": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000], "sample_labels": ["SRR648667.forward", "SRR648668.forward", "SRR648669.forward", "SRR648670.forward", "SRR648667.reverse", "SRR648668.reverse", "SRR648669.reverse", "SRR648670.reverse"], "downstream": [0, 0, 0, 0, 0, 0, 0, 0], "unscaled 3 prime": [0, 0, 0, 0, 0, 0, 0, 0], "group_labels": ["genes"], "bin size": [10, 10, 10, 10, 10, 10, 10, 10], "upstream": [0, 0, 0, 0, 0, 0, 0, 0], "group_boundaries": [0, 107], "sample_boundaries": [0, 100, 200, 300, 400, 500, 600, 700, 800], "max threshold": None, "ref point": [None, None, None, None, None, None, None, None], "min threshold": None, "sort regions": "no", "proc number": 20, "bin avg type": "mean", "missing data as zero": False}
        oname = "/tmp/filterStrand1.mat.gz"
        args = "filterStrand -m {} -o {} --strand +".format(self.matrix, oname)
        args = args.split()
        cmo.main(args)
        f = gzip.GzipFile(oname)
        d = getHeader(f)  # Skip the header, which can be in a different order
        h = hashlib.md5(f.read()).hexdigest()
        f.close()
        assert(d == dCorrect)
        assert(h == "300f8000be5b5f51e803b57ef08f1c9e")
        os.remove(oname)

        dCorrect = {u'verbose': True, u'scale': 1, u'skip zeros': False, u'nan after end': False, u'sort using': u'mean', u'unscaled 5 prime': [0, 0, 0, 0, 0, 0, 0, 0], u'body': [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000], u'sample_labels': [u'SRR648667.forward', u'SRR648668.forward', u'SRR648669.forward', u'SRR648670.forward', u'SRR648667.reverse', u'SRR648668.reverse', u'SRR648669.reverse', u'SRR648670.reverse'], u'downstream': [0, 0, 0, 0, 0, 0, 0, 0], u'unscaled 3 prime': [0, 0, 0, 0, 0, 0, 0, 0], u'group_labels': [u'genes'], u'bin size': [10, 10, 10, 10, 10, 10, 10, 10], u'upstream': [0, 0, 0, 0, 0, 0, 0, 0], u'group_boundaries': [0, 89], u'sample_boundaries': [0, 100, 200, 300, 400, 500, 600, 700, 800], u'missing data as zero': False, u'ref point': [None, None, None, None, None, None, None, None], u'min threshold': None, u'sort regions': u'no', u'proc number': 20, u'bin avg type': u'mean', u'max threshold': None}
        oname = "/tmp/filterStrand2.mat.gz"
        args = "filterStrand -m {} -o {} --strand -".format(self.matrix, oname)
        args = args.split()
        cmo.main(args)
        f = gzip.GzipFile(oname)
        d = getHeader(f)  # Skip the header, which can be in a different order
        h = hashlib.md5(f.read()).hexdigest()
        f.close()
        assert(d == dCorrect)
        assert(h == "0a6ca070a5ba4564f1ab950ac3b7c8f1")
        os.remove(oname)
Beispiel #3
0
 def testsort(self):
     """
     computeMatrixOperations sort
     """
     dCorrect = {
         "verbose":
         True,
         "scale":
         1,
         "skip zeros":
         False,
         "nan after end":
         False,
         "sort using":
         "mean",
         "unscaled 5 prime": [0, 0, 0, 0, 0, 0, 0, 0],
         "body": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000],
         "sample_labels": [
             "SRR648667.forward", "SRR648668.forward", "SRR648669.forward",
             "SRR648670.forward", "SRR648667.reverse", "SRR648668.reverse",
             "SRR648669.reverse", "SRR648670.reverse"
         ],
         "downstream": [0, 0, 0, 0, 0, 0, 0, 0],
         "unscaled 3 prime": [0, 0, 0, 0, 0, 0, 0, 0],
         "group_labels": ["genes"],
         "bin size": [10, 10, 10, 10, 10, 10, 10, 10],
         "upstream": [0, 0, 0, 0, 0, 0, 0, 0],
         "group_boundaries": [0, 196],
         "sample_boundaries": [0, 100, 200, 300, 400, 500, 600, 700, 800],
         "max threshold":
         None,
         "ref point": [None, None, None, None, None, None, None, None],
         "min threshold":
         None,
         "sort regions":
         "no",
         "proc number":
         20,
         "bin avg type":
         "mean",
         "missing data as zero":
         False
     }
     oname = "/tmp/sorted.mat.gz"
     args = "sort -m {} -o {} -R {}".format(self.matrix, oname, self.bed)
     args = args.split()
     cmo.main(args)
     f = gzip.GzipFile(oname)
     d = getHeader(f)  # Skip the header, which can be in a different order
     h = hashlib.md5(f.read()).hexdigest()
     f.close()
     assert (d == dCorrect)
     assert (h == "10ea07d1aa58f44625abe2142ef76094")
     os.remove(oname)
Beispiel #4
0
 def testRelabel(self):
     """
     computeMatrixOperations relabel
     """
     dCorrect = {
         "verbose":
         True,
         "scale":
         1,
         "skip zeros":
         False,
         "nan after end":
         False,
         "sort using":
         "mean",
         "unscaled 5 prime": [0, 0, 0, 0, 0, 0, 0, 0],
         "body": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000],
         "sample_labels":
         ["first", "sec ond", "3rd", "4th", "5th", "6th", "7th", "8th"],
         "downstream": [0, 0, 0, 0, 0, 0, 0, 0],
         "unscaled 3 prime": [0, 0, 0, 0, 0, 0, 0, 0],
         "group_labels": ["foo bar"],
         "bin size": [10, 10, 10, 10, 10, 10, 10, 10],
         "upstream": [0, 0, 0, 0, 0, 0, 0, 0],
         "group_boundaries": [0, 196],
         "sample_boundaries": [0, 100, 200, 300, 400, 500, 600, 700, 800],
         "max threshold":
         None,
         "ref point": [None, None, None, None, None, None, None, None],
         "min threshold":
         None,
         "sort regions":
         "no",
         "proc number":
         20,
         "bin avg type":
         "mean",
         "missing data as zero":
         False
     }
     oname = "/tmp/relabeled.mat.gz"
     args = "relabel -m {} -o {} --sampleLabels first sec_ond 3rd 4th 5th 6th 7th 8th --groupLabels foo_bar".format(
         self.matrix, oname)
     args = args.split()
     args[7] = 'sec ond'  # split mucks up spaces
     args[-1] = 'foo bar'
     cmo.main(args)
     f = gzip.GzipFile(oname)
     d = getHeader(f)
     assert (d == dCorrect)
     f.close()
     os.remove(oname)
 def testcbind(self):
     """
     computeMatrixOperations cbind
     """
     dCorrect = {"verbose": True, "scale": 1, "skip zeros": False, "nan after end": False, "sort using": "mean", "unscaled 5 prime": 0, "body": 1000, "sample_labels": ["SRR648667.forward", "SRR648668.forward", "SRR648669.forward", "SRR648670.forward", "SRR648667.reverse", "SRR648668.reverse", "SRR648669.reverse", "SRR648670.reverse", "SRR648667.forward", "SRR648668.forward", "SRR648669.forward", "SRR648670.forward", "SRR648667.reverse", "SRR648668.reverse", "SRR648669.reverse", "SRR648670.reverse"], "downstream": 0, "unscaled 3 prime": 0, "group_labels": ["genes"], "bin size": 10, "upstream": 0, "group_boundaries": [0, 196], "sample_boundaries": [0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600], "max threshold": None, "ref point": None, "min threshold": None, "sort regions": "no", "proc number": 20, "bin avg type": "mean", "missing data as zero": False}
     oname = "/tmp/filterStrand.mat.gz"
     args = "cbind -m {0} {0} -o {1}".format(self.matrix, oname)
     args = args.split()
     cmo.main(args)
     f = gzip.GzipFile(oname)
     d = getHeader(f)  # Skip the header, which can be in a different order
     h = hashlib.md5(f.read()).hexdigest()
     f.close()
     assert(d == dCorrect)
     assert(h == "e55d89704bb16a11f366663a8fd90a47")
     os.remove(oname)
 def testSubset(self):
     """
     computeMatrixOperations subset
     """
     dCorrect = {"verbose": True, "scale": 1, "skip zeros": False, "nan after end": False, "sort using": "mean", "unscaled 5 prime": 0, "body": 1000, "sample_labels": ["SRR648667.forward", "SRR648668.forward", "SRR648669.forward", "SRR648670.forward"], "downstream": 0, "unscaled 3 prime": 0, "group_labels": ["genes"], "bin size": 10, "upstream": 0, "group_boundaries": [0, 196], "sample_boundaries": [0, 100, 200, 300, 400], "max threshold": None, "ref point": None, "min threshold": None, "sort regions": "no", "proc number": 20, "bin avg type": "mean", "missing data as zero": False}
     oname = "/tmp/subset.mat.gz"
     args = "subset -m {} --sample SRR648667.forward SRR648668.forward SRR648669.forward SRR648670.forward -o {}".format(self.matrix, oname)
     args = args.split()
     cmo.main(args)
     f = gzip.GzipFile(oname)
     d = getHeader(f)  # Skip the header, which can be in a different order
     h = hashlib.md5(f.read()).hexdigest()
     f.close()
     assert(d == dCorrect)
     assert(h == "edb3c8506c3f27ebb8c7ddf94d5ba594")
     os.remove(oname)
 def testRelabel(self):
     """
     computeMatrixOperations relabel
     """
     dCorrect = {"verbose": True, "scale": 1, "skip zeros": False, "nan after end": False, "sort using": "mean", "unscaled 5 prime": [0, 0, 0, 0, 0, 0, 0, 0], "body": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000], "sample_labels": ["first", "sec ond", "3rd", "4th", "5th", "6th", "7th", "8th"], "downstream": [0, 0, 0, 0, 0, 0, 0, 0], "unscaled 3 prime": [0, 0, 0, 0, 0, 0, 0, 0], "group_labels": ["foo bar"], "bin size": [10, 10, 10, 10, 10, 10, 10, 10], "upstream": [0, 0, 0, 0, 0, 0, 0, 0], "group_boundaries": [0, 196], "sample_boundaries": [0, 100, 200, 300, 400, 500, 600, 700, 800], "max threshold": None, "ref point": [None, None, None, None, None, None, None, None], "min threshold": None, "sort regions": "no", "proc number": 20, "bin avg type": "mean", "missing data as zero": False}
     oname = "/tmp/relabeled.mat.gz"
     args = "relabel -m {} -o {} --sampleLabels first sec_ond 3rd 4th 5th 6th 7th 8th --groupLabels foo_bar".format(self.matrix, oname)
     args = args.split()
     args[7] = 'sec ond'  # split mucks up spaces
     args[-1] = 'foo bar'
     cmo.main(args)
     f = gzip.GzipFile(oname)
     d = getHeader(f)
     assert(d == dCorrect)
     f.close()
     os.remove(oname)
 def testsort(self):
     """
     computeMatrixOperations sort
     """
     dCorrect = {"verbose": True, "scale": 1, "skip zeros": False, "nan after end": False, "sort using": "mean", "unscaled 5 prime": [0, 0, 0, 0, 0, 0, 0, 0], "body": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000], "sample_labels": ["SRR648667.forward", "SRR648668.forward", "SRR648669.forward", "SRR648670.forward", "SRR648667.reverse", "SRR648668.reverse", "SRR648669.reverse", "SRR648670.reverse"], "downstream": [0, 0, 0, 0, 0, 0, 0, 0], "unscaled 3 prime": [0, 0, 0, 0, 0, 0, 0, 0], "group_labels": ["genes"], "bin size": [10, 10, 10, 10, 10, 10, 10, 10], "upstream": [0, 0, 0, 0, 0, 0, 0, 0], "group_boundaries": [0, 196], "sample_boundaries": [0, 100, 200, 300, 400, 500, 600, 700, 800], "max threshold": None, "ref point": [None, None, None, None, None, None, None, None], "min threshold": None, "sort regions": "no", "proc number": 20, "bin avg type": "mean", "missing data as zero": False}
     oname = "/tmp/sorted.mat.gz"
     args = "sort -m {} -o {} -R {}".format(self.matrix, oname, self.bed)
     args = args.split()
     cmo.main(args)
     f = gzip.GzipFile(oname)
     d = getHeader(f)  # Skip the header, which can be in a different order
     h = hashlib.md5(f.read()).hexdigest()
     f.close()
     assert(d == dCorrect)
     assert(h == "10ea07d1aa58f44625abe2142ef76094")
     os.remove(oname)
Beispiel #9
0
 def testcbind(self):
     """
     computeMatrixOperations cbind
     """
     dCorrect = {"verbose": True, "scale": 1, "skip zeros": False, "nan after end": False, "sort using": "mean", "unscaled 5 prime": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "body": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000], "sample_labels": ["SRR648667.forward", "SRR648668.forward", "SRR648669.forward", "SRR648670.forward", "SRR648667.reverse", "SRR648668.reverse", "SRR648669.reverse", "SRR648670.reverse", "SRR648667.forward", "SRR648668.forward", "SRR648669.forward", "SRR648670.forward", "SRR648667.reverse", "SRR648668.reverse", "SRR648669.reverse", "SRR648670.reverse"], "downstream": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "unscaled 3 prime": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "group_labels": ["genes"], "bin size": [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10], "upstream": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "group_boundaries": [0, 196], "sample_boundaries": [0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600], "max threshold": None, "ref point": [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], "min threshold": None, "sort regions": "no", "proc number": 20, "bin avg type": "mean", "missing data as zero": False}
     oname = "/tmp/filterStrand.mat.gz"
     args = "cbind -m {0} {0} -o {1}".format(self.matrix, oname)
     args = args.split()
     cmo.main(args)
     f = gzip.GzipFile(oname)
     d = getHeader(f)  # Skip the header, which can be in a different order
     h = hashlib.md5(f.read()).hexdigest()
     f.close()
     assert(d == dCorrect)
     assert(h == "e55d89704bb16a11f366663a8fd90a47")
     os.remove(oname)
Beispiel #10
0
 def testrbind2(self):
     """
     computeMatrixOperations rbind with different groups
     """
     dCorrect = {"verbose": False, "scale": 1, "skip zeros": False, "nan after end": False, "sort using": "mean", "unscaled 5 prime": [0], "body": [2], "sample_labels": ["signal"], "downstream": [1], "unscaled 3 prime": [0], "group_labels": ["somegenes", "othergenes"], "bin size": [1], "upstream": [1], "group_boundaries": [0, 3, 7], "sample_boundaries": [0, 4], "max threshold": None, "ref point": [None], "min threshold": None, "sort regions": "keep", "proc number": 1, "bin avg type": "mean", "missing data as zero": True}
     oname = "/tmp/rbind2.mat.gz"
     args = "rbind -m {0} {1} -o {2}".format(self.rbindMatrix1, self.rbindMatrix2, oname)
     args = args.split()
     cmo.main(args)
     f = gzip.GzipFile(oname)
     d = getHeader(f)  # Skip the header, which can be in a different order
     h = hashlib.md5(f.read()).hexdigest()
     f.close()
     assert(d == dCorrect)
     assert(h == "5d8b1517fc4c63d000b6b37f70ee163b")
     os.remove(oname)
 def testSubset(self):
     """
     computeMatrixOperations subset
     """
     dCorrect = {
         "verbose":
         True,
         "scale":
         1,
         "skip zeros":
         False,
         "nan after end":
         False,
         "sort using":
         "mean",
         "unscaled 5 prime":
         0,
         "body":
         1000,
         "sample_labels": [
             "SRR648667.forward", "SRR648668.forward", "SRR648669.forward",
             "SRR648670.forward"
         ],
         "downstream":
         0,
         "unscaled 3 prime":
         0,
         "group_labels": ["genes"],
         "bin size":
         10,
         "upstream":
         0,
         "group_boundaries": [0, 196],
         "sample_boundaries": [0, 100, 200, 300, 400],
         "max threshold":
         None,
         "ref point":
         None,
         "min threshold":
         None,
         "sort regions":
         "no",
         "proc number":
         20,
         "bin avg type":
         "mean",
         "missing data as zero":
         False
     }
     oname = "/tmp/subset.mat.gz"
     args = "subset -m {} --sample SRR648667.forward SRR648668.forward SRR648669.forward SRR648670.forward -o {}".format(
         self.matrix, oname)
     args = args.split()
     cmo.main(args)
     f = gzip.GzipFile(oname)
     d = getHeader(f)  # Skip the header, which can be in a different order
     h = hashlib.md5(f.read()).hexdigest()
     f.close()
     assert (d == dCorrect)
     assert (h == "edb3c8506c3f27ebb8c7ddf94d5ba594")
     os.remove(oname)