コード例 #1
0
ファイル: format.py プロジェクト: statgen/fivex
 def __call__(self, row: str) -> gencodeContainer:
     fields: ty.List[ty.Any] = row.split("\t")
     fields[0] = fields[0].replace("chr", "")
     fields[3] = int(fields[3])
     fields[4] = int(fields[4])
     fields[6] = fields[6].split(".")[0]
     return gencodeContainer(*fields)
コード例 #2
0
    def test_int_returns_int_subclass(self):
        class BadInt:
            def __int__(self):
                return True

        class BadInt2(builtins.int):
            def __int__(self):
                return True

        # class TruncReturnsBadInt:
        #     def __trunc__(self):
        #         return BadInt()
        #
        # class TruncReturnsIntSubclass:
        #     def __trunc__(self):
        #         return True

        bad_int = BadInt()
        with self.assertWarns(DeprecationWarning):
            n = int(bad_int)
        self.assertEqual(n, 1)

        bad_int = BadInt2()
        with self.assertWarns(DeprecationWarning):
            n = int(bad_int)
        self.assertEqual(n, 1)
コード例 #3
0
ファイル: format.py プロジェクト: statgen/fivex
    def __call__(self, row: str) -> CIContainer:
        # Columns in the raw credible_sets data:
        # phenotype_id: this corresponds to genes in gene expression data
        #               and both gene and credible inteval in Txrevise data
        # variant_id: in chrom_pos_ref_alt format; we don't use this
        # chr
        # pos
        # ref
        # alt
        # cs_id: this is simply {phenotype_id}_{cs_index}
        # cs_index: credible set label, either L1 or L2
        # finemapped_region: a range for region tested, in chrom:start-end format
        # pip: generated using SuSie
        # z: z-score
        # cs_min_r2
        # cs_avg_r2
        # cs_size: credible set size, i.e. the number of variants contained in this credible set
        # posterior_mean: posterior effect size
        # posterior_sd: posterior standard deviation
        # cs_log10bf: log10 of the Bayes Factor for this credible set
        ### Extra columns added by joining main QTL data ###
        # ma_samples
        # maf
        # pvalue
        # beta
        # se
        # type
        # ac
        # an
        # r2
        # mol_trait_obj_id
        # gid
        # median_tpm
        # rsid
        fields: ty.List[ty.Any] = row.split("\t")
        if self.study and self.tissue:
            # Tissue-and-study-specific files have two fewer columns (study and tissue),
            # and so the fields must be appended to match the number of fields in the all-tissue file
            fields = [self.study, self.tissue] + fields
        fields[5] = int(fields[5])  # pos
        fields[11] = float(fields[11])  # pip
        fields[12] = float(fields[12])  # z
        fields[13] = float(fields[13])  # cs_min_r2
        fields[14] = float(fields[14])  # cs_avg_r2
        fields[15] = int(fields[15])  # cs_size
        fields[16] = float(fields[16])  # posterior_mean
        fields[17] = float(fields[17])  # posteriof_sd
        fields[18] = float(fields[18])  # cs_log10bf
        # Extra fields from joined file
        if len(fields) > 19:
            fields[19] = int(fields[19])  # ma_samples
            fields[20] = float(fields[20])  # maf
            fields[21] = float(fields[21])  # pvalue
            fields[22] = float(fields[22])  # beta
            fields[23] = float(fields[23])  # se
            fields[25] = int(fields[25])  # ac
            fields[26] = int(fields[26])  # an
            fields[30] = float(fields[30])  # median_tpm

        return CIContainer(*fields)
コード例 #4
0
 def test_keyword_args(self):
     # Test invoking int() using keyword arguments.
     self.assertEqual(int(x=1.2), 1)
     self.assertEqual(int('100', base=2), 4)
     self.assertEqual(int(x='100', base=2), 4)
     self.assertRaises(TypeError, int, base=10)
     self.assertRaises(TypeError, int, base=0)
コード例 #5
0
 def test_keyword_args(self):
     # Test invoking int() using keyword arguments.
     self.assertEqual(int(x=1.2), 1)
     self.assertEqual(int('100', base=2), 4)
     self.assertEqual(int(x='100', base=2), 4)
     self.assertRaises(TypeError, int, base=10)
     self.assertRaises(TypeError, int, base=0)
コード例 #6
0
    def test_intconversion(self):
        # Test __int__()
        class ClassicMissingMethods:
            pass
        self.assertRaises(TypeError, int, ClassicMissingMethods())

        class MissingMethods(object):
            pass
        self.assertRaises(TypeError, int, MissingMethods())

        class Foo0:
            def __int__(self):
                return 42

        self.assertEqual(int(Foo0()), 42)

        class Classic:
            pass
        for base in (object, Classic):
            class IntOverridesTrunc(base):
                def __int__(self):
                    return 42
                def __trunc__(self):
                    return -12
            self.assertEqual(int(IntOverridesTrunc()), 42)
コード例 #7
0
    def test_int_returns_int_subclass(self):
        class BadInt:
            def __int__(self):
                return True

        class BadInt2(builtins.int):
            def __int__(self):
                return True

        # class TruncReturnsBadInt:
        #     def __trunc__(self):
        #         return BadInt()
        #
        # class TruncReturnsIntSubclass:
        #     def __trunc__(self):
        #         return True

        bad_int = BadInt()
        with self.assertWarns(DeprecationWarning):
            n = int(bad_int)
        self.assertEqual(n, 1)
        self.assertIs(type(n), builtins.int)

        bad_int = BadInt2()
        with self.assertWarns(DeprecationWarning):
            n = int(bad_int)
        self.assertEqual(n, 1)
        self.assertIs(type(n), builtins.int)
コード例 #8
0
    def test_intconversion(self):
        # Test __int__()
        class ClassicMissingMethods:
            pass
        self.assertRaises(TypeError, int, ClassicMissingMethods())

        class MissingMethods(object):
            pass
        self.assertRaises(TypeError, int, MissingMethods())

        class Foo0:
            def __int__(self):
                return 42

        self.assertEqual(int(Foo0()), 42)

        class Classic:
            pass
        for base in (object, Classic):
            class IntOverridesTrunc(base):
                def __int__(self):
                    return 42
                def __trunc__(self):
                    return -12
            self.assertEqual(int(IntOverridesTrunc()), 42)
コード例 #9
0
 def test_int_buffer(self):
     with test_support.check_py3k_warnings():
         self.assertEqual(int(buffer('123', 1, 2)), 23)
         self.assertEqual(int(buffer('123\x00', 1, 2)), 23)
         self.assertEqual(int(buffer('123 ', 1, 2)), 23)
         self.assertEqual(int(buffer('123A', 1, 2)), 23)
         self.assertEqual(int(buffer('1234', 1, 2)), 23)
コード例 #10
0
 def check(s, base=None):
     with self.assertRaises(ValueError,
                            msg="int(%r, %r)" % (s, base)) as cm:
         if base is None:
             int(s)
         else:
             int(s, base)
     self.assertEqual(cm.exception.args[0],
         "invalid literal for int() with base %d: %r" %
         (10 if base is None else base, s))
コード例 #11
0
 def check(s, base=None):
     with self.assertRaises(ValueError,
                            msg="int(%r, %r)" % (s, base)) as cm:
         if base is None:
             int(s)
         else:
             int(s, base)
     self.assertEqual(cm.exception.args[0],
         "invalid literal for int() with base %d: %r" %
         (10 if base is None else base, s))
コード例 #12
0
ファイル: make_rsid_lookup.py プロジェクト: abought/zorp
def line_parser(row) -> ty.Tuple[str, int, str, str, int]:
    """For new dbSNP format, builds 152+"""
    fields = row.split()
    # the new dbSNP format uses refseq ids + version; convert these to human-readable chromosome names
    chrom = VERSIONLESS_CHROMS[fields[0].split('.')[0]]
    pos = int(fields[1])

    ref = fields[3]
    alt = fields[4]

    # Get the RSID from the VCF info field, in case the id column is ambiguous for some reason
    rsid = int(RSID_CAPTURE.search(fields[7]).group(1))

    return (chrom, pos, ref, alt, rsid)
コード例 #13
0
    def test_int_base_indexable(self):
        class MyIndexable(object):
            def __init__(self, value):
                self.value = value
            def __index__(self):
                return self.value

        # Check out of range bases.
        for base in 2**100, -2**100, 1, 37:
            with self.assertRaises(ValueError):
                int('43', base)

        # Check in-range bases.
        self.assertEqual(int('101', base=MyIndexable(2)), 5)
        self.assertEqual(int('101', base=MyIndexable(10)), 101)
        self.assertEqual(int('101', base=MyIndexable(36)), 1 + 36**2)
コード例 #14
0
    def test_int_base_indexable(self):
        class MyIndexable(object):
            def __init__(self, value):
                self.value = value
            def __index__(self):
                return self.value

        # Check out of range bases.
        for base in 2**100, -2**100, 1, 37:
            with self.assertRaises(ValueError):
                int('43', base)

        # Check in-range bases.
        self.assertEqual(int('101', base=MyIndexable(2)), 5)
        self.assertEqual(int('101', base=MyIndexable(10)), 101)
        self.assertEqual(int('101', base=MyIndexable(36)), 1 + 36**2)
コード例 #15
0
    def test_non_numeric_input_types(self):
        # Test possible non-numeric types for the argument x, including
        # subclasses of the explicitly documented accepted types.
        class CustomStr(str): pass
        class CustomBytes(bytes): pass
        class CustomByteArray(bytearray): pass

        values = [b'100',
                  bytearray(b'100'),
                  CustomStr('100'),
                  CustomBytes(b'100'),
                  CustomByteArray(b'100')]

        for x in values:
            msg = 'x has type %s' % type(x).__name__
            self.assertEqual(int(x), 100, msg=msg)
            self.assertEqual(int(x, 2), 4, msg=msg)
コード例 #16
0
    def _import_reference_thread_tped(self, i):

        # Load
        with gzip.open(self._refData + '.chr' + str(i) + '.tped.gz',
                       'rt') as f:

            db = snpdb.db()
            db.open(self._refData + '.chr' + str(i))

            for line in f:

                L = line.split()  # [chr,rid,irrelevant,pos,genotype]

                # Get genotype and dephase
                if L[1][0:2] == 'rs':

                    dephased = (np.array(L[4:], dtype='b') - 1)
                    genotype = np.sum(dephased.reshape(
                        (int(len(dephased) / 2), 2)),
                                      axis=1)

                    # PLINK uses 1 for minor allele -> Convert to minor allele count
                    genotype[genotype == 2] = -1
                    genotype[genotype == 0] = 2
                    genotype[genotype == -1] = 0

                    m = np.mean(genotype)
                    s = np.std(genotype)

                    if s != 0:
                        # Compute MAF
                        MAF = m / 2.
                        if (MAF > 0.5):
                            MAF = 1.0 - MAF

                        T = [L[1], round(MAF, 3), genotype]

                        # Store
                        db.insert({int(L[3]): T})

            db.close()

        return True
コード例 #17
0
    def test_non_numeric_input_types(self):
        # Test possible non-numeric types for the argument x, including
        # subclasses of the explicitly documented accepted types.
        class CustomStr(str): pass
        class CustomBytes(bytes): pass
        class CustomByteArray(bytearray): pass

        factories = [
            bytes,
            bytearray,
            lambda b: CustomStr(b.decode()),
            CustomBytes,
            CustomByteArray,
            memoryview,
        ]
        try:
            from array import array
        except ImportError:
            pass
        else:
            factories.append(lambda b: array('B', b))

        for f in factories:
            x = f(b'100')
            with self.subTest(type(x)):
                self.assertEqual(int(x), 100)
                if isinstance(x, (str, bytes, bytearray)):
                    self.assertEqual(int(x, 2), 4)
                else:
                    msg = "can't convert non-string"
                    with self.assertRaisesRegex(TypeError, msg):
                        int(x, 2)
                with self.assertRaisesRegex(ValueError, 'invalid literal'):
                    int(f(b'A' * 0x10))
コード例 #18
0
    def test_non_numeric_input_types(self):
        # Test possible non-numeric types for the argument x, including
        # subclasses of the explicitly documented accepted types.
        class CustomStr(str): pass
        class CustomBytes(bytes): pass
        class CustomByteArray(bytearray): pass

        factories = [
            bytes,
            bytearray,
            lambda b: CustomStr(b.decode()),
            CustomBytes,
            CustomByteArray,
            memoryview,
        ]
        try:
            from array import array
        except ImportError:
            pass
        else:
            factories.append(lambda b: array('B', b))

        for f in factories:
            x = f(b'100')
            with self.subTest(type(x)):
                self.assertEqual(int(x), 100)
                if isinstance(x, (str, bytes, bytearray)):
                    self.assertEqual(int(x, 2), 4)
                else:
                    msg = "can't convert non-string"
                    with self.assertRaisesRegex(TypeError, msg):
                        int(x, 2)
                with self.assertRaisesRegex(ValueError, 'invalid literal'):
                    int(f(b'A' * 0x10))
コード例 #19
0
    def test_valid_non_numeric_input_types_for_x(self):
        # Test possible valid non-numeric types for x, including subclasses
        # of the allowed built-in types.
        class CustomStr(str): pass
        class CustomByteArray(bytearray): pass
        factories = [str, bytearray, CustomStr, CustomByteArray, buffer]

        if have_unicode:
            class CustomUnicode(unicode): pass
            factories += [unicode, CustomUnicode]

        for f in factories:
            with test_support.check_py3k_warnings(quiet=True):
                x = f('100')
            msg = 'x has value %s and type %s' % (x, type(x).__name__)
            try:
                self.assertEqual(int(x), 100, msg=msg)
                if isinstance(x, basestring):
                    self.assertEqual(int(x, 2), 4, msg=msg)
            except TypeError, err:
                raise AssertionError('For %s got TypeError: %s' %
                                     (type(x).__name__, err))
            if not isinstance(x, basestring):
                errmsg = "can't convert non-string"
                with self.assertRaisesRegexp(TypeError, errmsg, msg=msg):
                    int(x, 2)
            errmsg = 'invalid literal'
            with self.assertRaisesRegexp(ValueError, errmsg, msg=msg), \
                 test_support.check_py3k_warnings(quiet=True):
                int(f('A' * 0x10))
コード例 #20
0
    def test_int_subclass_with_int(self):
        class MyInt(builtins.int):
            def __int__(self):
                return 42

        class BadInt(builtins.int):
            def __int__(self):
                return 42.0

        my_int = MyInt(7)
        self.assertEqual(my_int, 7)
        self.assertEqual(int(my_int), 42)

        self.assertRaises(TypeError, int, BadInt())
コード例 #21
0
    def test_int_subclass_with_int(self):
        class MyInt(builtins.int):
            def __int__(self):
                return 42

        class BadInt(builtins.int):
            def __int__(self):
                return 42.0

        my_int = MyInt(7)
        self.assertEqual(my_int, 7)
        self.assertEqual(int(my_int), 42)

        self.assertRaises(TypeError, int, BadInt())
コード例 #22
0
    def test_int_returns_int_subclass(self):
        class BadInt:
            def __int__(self):
                return True

        class BadInt2(builtins.int):
            def __int__(self):
                return True

        # class TruncReturnsBadInt:
        #     def __trunc__(self):
        #         return BadInt()
        #
        # class TruncReturnsIntSubclass:
        #     def __trunc__(self):
        #         return True

        bad_int = BadInt()
        n = int(bad_int)
        self.assertEqual(n, 1)

        bad_int = BadInt2()
        n = int(bad_int)
        self.assertEqual(n, 1)
コード例 #23
0
    def test_non_numeric_input_types(self):
        # Test possible non-numeric types for the argument x, including
        # subclasses of the explicitly documented accepted types.
        class CustomStr(str):
            pass

        class CustomBytes(bytes):
            pass

        class CustomByteArray(bytearray):
            pass

        values = [
            b'100',
            bytearray(b'100'),
            CustomStr('100'),
            CustomBytes(b'100'),
            CustomByteArray(b'100')
        ]

        for x in values:
            msg = 'x has type %s' % type(x).__name__
            self.assertEqual(int(x), 100, msg=msg)
            self.assertEqual(int(x, 2), 4, msg=msg)
コード例 #24
0
def variant_parser(row: str) -> VariantContainer:
    """
    This is a stub class that specifies how to parse a line. It could accept configuration in the future,
    eg diff column numbers if there was more than one file with the same data arranged in diff ways

    It does the work of finding the fields, and of turning the text file into numeric data where appropriate

    The parser is the piece tied to file format, so this must change if the file format changes!
    """

    fields = row.split('\t')
    # For now we clean up three fields exactly.
    # if data format changes!
    fields[0] = fields[0].replace('chr', '')  # chrom
    fields[1] = int(fields[1])  # pos
    fields[10] = float(fields[10])  # pvalue_nominal

    return VariantContainer(*fields)
コード例 #25
0
    def test_intconversion(self):
        # Test __int__()
        class ClassicMissingMethods:
            pass
        self.assertRaises(AttributeError, int, ClassicMissingMethods())

        class MissingMethods(object):
            pass
        self.assertRaises(TypeError, int, MissingMethods())

        class Foo0:
            def __int__(self):
                return 42

        class Foo1(object):
            def __int__(self):
                return 42

        class Foo2(__builtin__.int):
            def __int__(self):
                return 42

        class Foo3(__builtin__.int):
            def __int__(self):
                return self

        class Foo4(__builtin__.int):
            def __int__(self):
                return 42L

        class Foo5(__builtin__.int):
            def __int__(self):
                return 42.

        self.assertEqual(int(Foo0()), 42)
        self.assertEqual(int(Foo1()), 42)
        self.assertEqual(int(Foo2()), 42)
        self.assertEqual(int(Foo3()), 0)
        self.assertEqual(int(Foo4()), 42L)
        self.assertRaises(TypeError, int, Foo5())

        class Classic:
            pass
        for base in (object, Classic):
            class IntOverridesTrunc(base):
                def __int__(self):
                    return 42
                def __trunc__(self):
                    return -12
            self.assertEqual(int(IntOverridesTrunc()), 42)
コード例 #26
0
 def test_underscores(self):
     for lit in VALID_UNDERSCORE_LITERALS:
         if any(ch in lit for ch in '.eEjJ'):
             continue
         self.assertEqual(int(lit, 0), eval(lit))
         self.assertEqual(int(lit, 0), int(lit.replace('_', ''), 0))
     for lit in INVALID_UNDERSCORE_LITERALS:
         if any(ch in lit for ch in '.eEjJ'):
             continue
         self.assertRaises(ValueError, int, lit, 0)
     # Additional test cases with bases != 0, only for the constructor:
     self.assertEqual(int("1_00", 3), 9)
     self.assertEqual(int("0_100"), 100)  # not valid as a literal!
     self.assertEqual(int(b"1_00"), 100)  # byte underscore
     self.assertRaises(ValueError, int, "_100")
     self.assertRaises(ValueError, int, "+_100")
     self.assertRaises(ValueError, int, "1__00")
     self.assertRaises(ValueError, int, "100_")
コード例 #27
0
 def test_int_base_limits(self):
     """Testing the supported limits of the int() base parameter."""
     self.assertEqual(int('0', 5), 0)
     with self.assertRaises(ValueError):
         int('0', 1)
     with self.assertRaises(ValueError):
         int('0', 37)
     with self.assertRaises(ValueError):
         int('0', -909)  # An old magic value base from Python 2.
     with self.assertRaises(ValueError):
         int('0', base=0-(2**234))
     with self.assertRaises(ValueError):
         int('0', base=2**234)
     # Bases 2 through 36 are supported.
     for base in range(2,37):
         self.assertEqual(int('0', base=base), 0)
コード例 #28
0
 def test_int_memoryview(self):
     self.assertEqual(int(memoryview(b'123')[1:3]), 23)
     self.assertEqual(int(memoryview(b'123\x00')[1:3]), 23)
     self.assertEqual(int(memoryview(b'123 ')[1:3]), 23)
     self.assertEqual(int(memoryview(b'123A')[1:3]), 23)
     self.assertEqual(int(memoryview(b'1234')[1:3]), 23)
コード例 #29
0
 def test_small_ints(self):
     self.assertIs(int('10'), 10)
     self.assertIs(int('-1'), -1)
     if have_unicode:
         self.assertIs(int(u'10'), 10)
         self.assertIs(int(u'-1'), -1)
コード例 #30
0
 def test_int_base_bad_types(self):
     """Not integer types are not valid bases; issue16772."""
     with self.assertRaises(TypeError):
         int('0', 5.5)
     with self.assertRaises(TypeError):
         int('0', 5.0)
コード例 #31
0
 def test_int_base_bad_types(self):
     """Not integer types are not valid bases; issue16772."""
     with self.assertRaises(TypeError):
         int('0', 5.5)
     with self.assertRaises(TypeError):
         int('0', 5.0)
コード例 #32
0
 def test_int_base_limits(self):
     """Testing the supported limits of the int() base parameter."""
     self.assertEqual(int('0', 5), 0)
     with self.assertRaises(ValueError):
         int('0', 1)
     with self.assertRaises(ValueError):
         int('0', 37)
     with self.assertRaises(ValueError):
         int('0', -909)  # An old magic value base from Python 2.
     with self.assertRaises(ValueError):
         int('0', base=0-(2**234))
     with self.assertRaises(ValueError):
         int('0', base=2**234)
     # Bases 2 through 36 are supported.
     for base in range(2,37):
         self.assertEqual(int('0', base=base), 0)
コード例 #33
0
 def test_no_args(self):
     self.assertEqual(int(), 0)
コード例 #34
0
 def test_no_args(self):
     self.assertEqual(int(), 0)
コード例 #35
0
    def _import_reference_thread_vcf(self, i, keepfile, qualityT, SNPonly):
        # Load filter info
        keep = set([])
        if keepfile is not None:
            f = open(keepfile, 'r')
            for line in f:
                S = line.split("\t")[0]
                keep.add(S)

            f.close()

        sampleMap = {}

        # Load
        with gzip.open(self._refData + '.chr' + str(i) + '.vcf.gz', 'rt') as f:

            # Find header
            for line in f:
                # Detect infos and headers
                if line[:2] == "##":
                    continue

                # Detect sample names
                if line[:2] == "#C":
                    data = line.split("\t")
                    tmp = data[9:]
                    for j in range(0, len(tmp)):
                        if (keepfile is None) or (tmp[j] in keep):
                            sampleMap[j] = tmp[j]

                    break

            sampleKeys = list(sampleMap.keys())

            db = snpdb.db()
            db.open(self._refData + '.chr' + str(i))

            # Main data import loop
            for line in f:

                # Data line
                data = line.split("\t")

                # Get GT pos
                tmp = data[8].split(":")
                GT = -1
                for j in range(0, len(tmp)):
                    if tmp[j] == 'GT':
                        GT = j
                        break

                # Checks
                if (GT == -1) or (data[2][:2] != 'rs') or (
                        data[6] != 'PASS' and qualityT is not None and
                    (int(data[5]) < qualityT)):
                    continue

                # Read genotype
                genotypes = data[9:]

                # Infer alternate alleles (pos 0: ref allele)
                alleles = [data[3]]
                alleles.extend(data[4].split(","))

                if SNPonly and (len(data[3]) > 1):
                    continue

                counter = np.zeros(len(alleles), dtype='int')

                # Only read samples in sampleMap
                genomap = {}
                for j in range(0, len(sampleKeys)):

                    geno = genotypes[sampleKeys[j]].split(":")[GT]

                    # Ignore half-calls
                    if geno[0] != "." and geno[2] != ".":
                        counter[int(geno[0])] += 1
                        counter[int(geno[2])] += 1

                    genomap[sampleKeys[j]] = geno

                # Reference allele
                refp = 0

                SC = np.argsort(counter)  # Sort alleles count
                for p in SC:

                    if p != refp:
                        if SNPonly and len(alleles[p]) > 1:
                            continue

                        minp = str(p)

                        gd = np.zeros(len(sampleKeys), dtype='B')

                        for j in range(0, len(sampleKeys)):
                            #geno = genotypes[sampleKeys[j]].split(":")[GT]
                            geno = genomap[sampleKeys[j]]

                            # Ignore half-calls
                            if geno[0] != '.' and geno[2] != '.':

                                if geno[0] == minp:
                                    gd[j] += 1

                                if geno[2] == minp:
                                    gd[j] += 1

                        # Compute MAF
                        MAF = np.mean(gd) / 2.
                        if (MAF > 0.5):
                            MAF = 1.0 - MAF

                        T = [data[2], MAF, gd, alleles[p],
                             alleles[refp]]  # Stores alt and ref allele

                        db.insert({int(data[1]): T})

            f.close()
            db.close()

        return True
コード例 #36
0
ファイル: updateRSID.py プロジェクト: BergmannLab/PascalX
def process_file(CHR, file, snpdb):
    c = 0
    t = 0
    with gzip.open(file[:-7] + ".snpid.vcf.gz", 'wt') as h:
        with gzip.open(snpdb, 'rt') as g:
            for dbline in g:
                # Search for start
                if dbline[0] == '#' or dbline.split('\t')[0] != CHR:
                    continue
                else:
                    break

            dbline = dbline.split('\t', 5)
            CACHE = None
            with gzip.open(file, 'rt') as f:
                for line in f:
                    found = False

                    # Search for start
                    if line[0] == '#':
                        # Copy line to new file
                        h.write(line)
                        continue

                    line_split = line.split('\t', 5)

                    #print("READ:",line[0:5])
                    t += 1
                    if line_split[2][:2] != 'rs':
                        ln = int(line_split[1])

                        # Seek line position in db
                        while int(dbline[1]) < ln and dbline[0] == CHR:
                            dbline = g.readline().split('\t', 5)

                        # Read all SNPs at same position
                        if int(dbline[1]) == ln and dbline[0] == CHR:
                            CACHE = [dbline]
                            while int(dbline[1]) == ln and dbline[0] == CHR:
                                dbline = g.readline().split('\t', 5)

                                if int(dbline[1]) == ln and dbline[0] == CHR:
                                    CACHE.append(dbline)

                        #print(CACHE)
                        if CACHE is not None and int(CACHE[0][1]) == ln:
                            R = line_split[3]
                            A = line_split[4]

                            for C in CACHE:
                                X = C[3]
                                Y = C[4].split(",")

                                if R == X and A in Y:
                                    #if line_split[3] == C[3] and line_split[4] == C[4]:
                                    # Store with replace
                                    #print("***",C[0:5],line[0:5])

                                    h.write(
                                        re.sub(CHR + ":(\w|:)+", C[2], line,
                                               1))
                                    found = True
                                    c += 1
                                    break

                    else:
                        # Store line
                        h.write(line)

                        c += 1
                        pass

        print("# SNPs in original ref data ( CHR", CHR, "):", t)
        print("# SNPs matched:", c)
コード例 #37
0
ファイル: parsers.py プロジェクト: abought/zorp
    def inner(line):
        # Return a stateful closure that does the actual work of parsing
        try:
            fields = line.strip().split(delimiter)
            if len(fields) == 1:
                raise exceptions.LineParseException(
                    'Unable to split line into separate fields. This line may have a missing or incorrect delimiter.'
                )

            # Fetch values
            ref = None
            alt = None
            if _marker_col is not None:
                chrom, pos, ref, alt = utils.parse_marker(fields[_marker_col])
            else:
                chrom = fields[_chrom_col]
                pos = fields[_pos_col]

            if chrom.startswith('chr'):
                chrom = chrom[3:]

            chrom = chrom.upper()

            # Explicit columns will override a value from the marker, by design
            if _ref_col is not None:
                ref = fields[_ref_col]

            if _alt_col is not None:
                alt = fields[_alt_col]

            pval = fields[_pvalue_col]

            # Some optional fields
            rsid = None
            beta = None
            stderr_beta = None
            alt_allele_freq = None
            allele_count = None
            n_samples = None

            if _rsid_col is not None:
                rsid = fields[_rsid_col]
                if rsid in MISSING_VALUES:
                    rsid = None
                elif not rsid.startswith('rs'):
                    rsid = 'rs' + rsid

            if _beta_col is not None:
                beta = fields[_beta_col]

            if _stderr_col is not None:
                stderr_beta = fields[_stderr_col]

            if _allele_freq_col is not None:
                alt_allele_freq = fields[_allele_freq_col]

            if _allele_count_col is not None:
                allele_count = fields[_allele_count_col]
                n_samples = fields[_n_samples_col]

            # Perform type coercion
            log_pval = utils.parse_pval_to_log(pval,
                                               is_neg_log=_is_neg_log_pvalue)

            try:
                pos = int(pos)
            except ValueError:
                # Some programs seem to write long positions using scientific notation, which int cannot handle
                try:
                    pos = int(float(pos))
                except ValueError:
                    # If we still can't parse, it's probably bad data
                    raise exceptions.LineParseException(
                        'Positions should be specified as integers. Could not parse value: {}'
                        .format(pos))

            if beta is not None:
                beta = None if beta in MISSING_VALUES else float(beta)
            if stderr_beta is not None:
                stderr_beta = None if stderr_beta in MISSING_VALUES else float(
                    stderr_beta)

            if _allele_freq_col or _allele_count_col:
                alt_allele_freq = utils.parse_allele_frequency(
                    freq=alt_allele_freq,
                    allele_count=allele_count,
                    n_samples=n_samples,
                    is_alt_effect=_is_alt_effect)

            # Some old GWAS files simply won't provide ref or alt information, and the parser will need to do without
            if ref in MISSING_VALUES:
                ref = None

            if isinstance(ref, str):
                ref = ref.upper()

            if alt in MISSING_VALUES:
                alt = None

            if isinstance(alt, str):
                alt = alt.upper()

            result = container(chrom, pos, rsid, ref, alt, log_pval, beta,
                               stderr_beta, alt_allele_freq)
        except Exception as e:
            raise exceptions.LineParseException(str(e), line=line)
        return result
コード例 #38
0
 def test_int_memoryview(self):
     self.assertEqual(int(memoryview(b'123')[1:3]), 23)
     self.assertEqual(int(memoryview(b'123\x00')[1:3]), 23)
     self.assertEqual(int(memoryview(b'123 ')[1:3]), 23)
     self.assertEqual(int(memoryview(b'123A')[1:3]), 23)
     self.assertEqual(int(memoryview(b'1234')[1:3]), 23)
コード例 #39
0
    def test_basic(self):
        self.assertEqual(int(314), 314)
        self.assertEqual(int(3.14), 3)
        # Check that conversion from float truncates towards zero
        self.assertEqual(int(-3.14), -3)
        self.assertEqual(int(3.9), 3)
        self.assertEqual(int(-3.9), -3)
        self.assertEqual(int(3.5), 3)
        self.assertEqual(int(-3.5), -3)
        self.assertEqual(int("-3"), -3)
        self.assertEqual(int(" -3 "), -3)
        self.assertEqual(int("\N{EM SPACE}-3\N{EN SPACE}"), -3)
        # Different base:
        self.assertEqual(int("10",16), 16)
        # Test conversion from strings and various anomalies
        for s, v in L:
            for sign in "", "+", "-":
                for prefix in "", " ", "\t", "  \t\t  ":
                    ss = prefix + sign + s
                    vv = v
                    if sign == "-" and v is not ValueError:
                        vv = -v
                    try:
                        self.assertEqual(int(ss), vv)
                    except ValueError:
                        pass

        s = repr(-1-sys.maxsize)
        x = int(s)
        self.assertEqual(x+1, -sys.maxsize)
        self.assertIsInstance(x, builtins.int)
        # should return int
        self.assertEqual(int(s[1:]), sys.maxsize+1)

        # should return int
        x = int(1e100)
        self.assertIsInstance(x, builtins.int)
        x = int(-1e100)
        self.assertIsInstance(x, builtins.int)


        # SF bug 434186:  0x80000000/2 != 0x80000000>>1.
        # Worked by accident in Windows release build, but failed in debug build.
        # Failed in all Linux builds.
        x = -1-sys.maxsize
        self.assertEqual(x >> 1, x//2)

        x = int('1' * 600)
        self.assertIsInstance(x, builtins.int)


        self.assertRaises(TypeError, int, 1, 12)

        self.assertEqual(int('0o123', 0), 83)
        self.assertEqual(int('0x123', 16), 291)

        # Bug 1679: "0x" is not a valid hex literal
        self.assertRaises(ValueError, int, "0x", 16)
        self.assertRaises(ValueError, int, "0x", 0)

        self.assertRaises(ValueError, int, "0o", 8)
        self.assertRaises(ValueError, int, "0o", 0)

        self.assertRaises(ValueError, int, "0b", 2)
        self.assertRaises(ValueError, int, "0b", 0)

        # SF bug 1334662: int(string, base) wrong answers
        # Various representations of 2**32 evaluated to 0
        # rather than 2**32 in previous versions

        self.assertEqual(int('100000000000000000000000000000000', 2), 4294967296)
        self.assertEqual(int('102002022201221111211', 3), 4294967296)
        self.assertEqual(int('10000000000000000', 4), 4294967296)
        self.assertEqual(int('32244002423141', 5), 4294967296)
        self.assertEqual(int('1550104015504', 6), 4294967296)
        self.assertEqual(int('211301422354', 7), 4294967296)
        self.assertEqual(int('40000000000', 8), 4294967296)
        self.assertEqual(int('12068657454', 9), 4294967296)
        self.assertEqual(int('4294967296', 10), 4294967296)
        self.assertEqual(int('1904440554', 11), 4294967296)
        self.assertEqual(int('9ba461594', 12), 4294967296)
        self.assertEqual(int('535a79889', 13), 4294967296)
        self.assertEqual(int('2ca5b7464', 14), 4294967296)
        self.assertEqual(int('1a20dcd81', 15), 4294967296)
        self.assertEqual(int('100000000', 16), 4294967296)
        self.assertEqual(int('a7ffda91', 17), 4294967296)
        self.assertEqual(int('704he7g4', 18), 4294967296)
        self.assertEqual(int('4f5aff66', 19), 4294967296)
        self.assertEqual(int('3723ai4g', 20), 4294967296)
        self.assertEqual(int('281d55i4', 21), 4294967296)
        self.assertEqual(int('1fj8b184', 22), 4294967296)
        self.assertEqual(int('1606k7ic', 23), 4294967296)
        self.assertEqual(int('mb994ag', 24), 4294967296)
        self.assertEqual(int('hek2mgl', 25), 4294967296)
        self.assertEqual(int('dnchbnm', 26), 4294967296)
        self.assertEqual(int('b28jpdm', 27), 4294967296)
        self.assertEqual(int('8pfgih4', 28), 4294967296)
        self.assertEqual(int('76beigg', 29), 4294967296)
        self.assertEqual(int('5qmcpqg', 30), 4294967296)
        self.assertEqual(int('4q0jto4', 31), 4294967296)
        self.assertEqual(int('4000000', 32), 4294967296)
        self.assertEqual(int('3aokq94', 33), 4294967296)
        self.assertEqual(int('2qhxjli', 34), 4294967296)
        self.assertEqual(int('2br45qb', 35), 4294967296)
        self.assertEqual(int('1z141z4', 36), 4294967296)

        # tests with base 0
        # this fails on 3.0, but in 2.x the old octal syntax is allowed
        self.assertEqual(int(' 0o123  ', 0), 83)
        self.assertEqual(int(' 0o123  ', 0), 83)
        self.assertEqual(int('000', 0), 0)
        self.assertEqual(int('0o123', 0), 83)
        self.assertEqual(int('0x123', 0), 291)
        self.assertEqual(int('0b100', 0), 4)
        self.assertEqual(int(' 0O123   ', 0), 83)
        self.assertEqual(int(' 0X123  ', 0), 291)
        self.assertEqual(int(' 0B100 ', 0), 4)

        # without base still base 10
        self.assertEqual(int('0123'), 123)
        self.assertEqual(int('0123', 10), 123)

        # tests with prefix and base != 0
        self.assertEqual(int('0x123', 16), 291)
        self.assertEqual(int('0o123', 8), 83)
        self.assertEqual(int('0b100', 2), 4)
        self.assertEqual(int('0X123', 16), 291)
        self.assertEqual(int('0O123', 8), 83)
        self.assertEqual(int('0B100', 2), 4)

        # the code has special checks for the first character after the
        #  type prefix
        self.assertRaises(ValueError, int, '0b2', 2)
        self.assertRaises(ValueError, int, '0b02', 2)
        self.assertRaises(ValueError, int, '0B2', 2)
        self.assertRaises(ValueError, int, '0B02', 2)
        self.assertRaises(ValueError, int, '0o8', 8)
        self.assertRaises(ValueError, int, '0o08', 8)
        self.assertRaises(ValueError, int, '0O8', 8)
        self.assertRaises(ValueError, int, '0O08', 8)
        self.assertRaises(ValueError, int, '0xg', 16)
        self.assertRaises(ValueError, int, '0x0g', 16)
        self.assertRaises(ValueError, int, '0Xg', 16)
        self.assertRaises(ValueError, int, '0X0g', 16)

        # SF bug 1334662: int(string, base) wrong answers
        # Checks for proper evaluation of 2**32 + 1
        self.assertEqual(int('100000000000000000000000000000001', 2), 4294967297)
        self.assertEqual(int('102002022201221111212', 3), 4294967297)
        self.assertEqual(int('10000000000000001', 4), 4294967297)
        self.assertEqual(int('32244002423142', 5), 4294967297)
        self.assertEqual(int('1550104015505', 6), 4294967297)
        self.assertEqual(int('211301422355', 7), 4294967297)
        self.assertEqual(int('40000000001', 8), 4294967297)
        self.assertEqual(int('12068657455', 9), 4294967297)
        self.assertEqual(int('4294967297', 10), 4294967297)
        self.assertEqual(int('1904440555', 11), 4294967297)
        self.assertEqual(int('9ba461595', 12), 4294967297)
        self.assertEqual(int('535a7988a', 13), 4294967297)
        self.assertEqual(int('2ca5b7465', 14), 4294967297)
        self.assertEqual(int('1a20dcd82', 15), 4294967297)
        self.assertEqual(int('100000001', 16), 4294967297)
        self.assertEqual(int('a7ffda92', 17), 4294967297)
        self.assertEqual(int('704he7g5', 18), 4294967297)
        self.assertEqual(int('4f5aff67', 19), 4294967297)
        self.assertEqual(int('3723ai4h', 20), 4294967297)
        self.assertEqual(int('281d55i5', 21), 4294967297)
        self.assertEqual(int('1fj8b185', 22), 4294967297)
        self.assertEqual(int('1606k7id', 23), 4294967297)
        self.assertEqual(int('mb994ah', 24), 4294967297)
        self.assertEqual(int('hek2mgm', 25), 4294967297)
        self.assertEqual(int('dnchbnn', 26), 4294967297)
        self.assertEqual(int('b28jpdn', 27), 4294967297)
        self.assertEqual(int('8pfgih5', 28), 4294967297)
        self.assertEqual(int('76beigh', 29), 4294967297)
        self.assertEqual(int('5qmcpqh', 30), 4294967297)
        self.assertEqual(int('4q0jto5', 31), 4294967297)
        self.assertEqual(int('4000001', 32), 4294967297)
        self.assertEqual(int('3aokq95', 33), 4294967297)
        self.assertEqual(int('2qhxjlj', 34), 4294967297)
        self.assertEqual(int('2br45qc', 35), 4294967297)
        self.assertEqual(int('1z141z5', 36), 4294967297)
コード例 #40
0
 def test_small_ints(self):
     # Bug #3236: Return small longs from PyLong_FromString
     self.assertIs(int('10'), 10)
     self.assertIs(int('-1'), -1)
     self.assertIs(int(b'10'), 10)
     self.assertIs(int(b'-1'), -1)
コード例 #41
0
    def test_basic(self):
        self.assertEqual(int(314), 314)
        self.assertEqual(int(3.14), 3)
        self.assertEqual(int(314L), 314)
        # Check that conversion from float truncates towards zero
        self.assertEqual(int(-3.14), -3)
        self.assertEqual(int(3.9), 3)
        self.assertEqual(int(-3.9), -3)
        self.assertEqual(int(3.5), 3)
        self.assertEqual(int(-3.5), -3)
        # Different base:
        self.assertEqual(int("10",16), 16L)
        if have_unicode:
            self.assertEqual(int(unicode("10"),16), 16L)
        # Test conversion from strings and various anomalies
        for s, v in L:
            for sign in "", "+", "-":
                for prefix in "", " ", "\t", "  \t\t  ":
                    ss = prefix + sign + s
                    vv = v
                    if sign == "-" and v is not ValueError:
                        vv = -v
                    try:
                        self.assertEqual(int(ss), vv)
                    except v:
                        pass

        s = repr(-1-sys.maxint)
        x = int(s)
        self.assertEqual(x+1, -sys.maxint)
        self.assertIsInstance(x, __builtin__.int)
        # should return long
        self.assertEqual(int(s[1:]), sys.maxint+1)

        # should return long
        x = int(1e100)
        self.assertIsInstance(x, long)
        x = int(-1e100)
        self.assertIsInstance(x, long)


        # SF bug 434186:  0x80000000/2 != 0x80000000>>1.
        # Worked by accident in Windows release build, but failed in debug build.
        # Failed in all Linux builds.
        x = -1-sys.maxint
        self.assertEqual(x >> 1, x//2)

        self.assertRaises(ValueError, int, '123\0')
        self.assertRaises(ValueError, int, '53', 40)

        # SF bug 1545497: embedded NULs were not detected with
        # explicit base
        self.assertRaises(ValueError, int, '123\0', 10)
        self.assertRaises(ValueError, int, '123\x00 245', 20)

        x = int('1' * 600)
        self.assertIsInstance(x, long)

        if have_unicode:
            x = int(unichr(0x661) * 600)
            self.assertIsInstance(x, long)

        self.assertRaises(TypeError, int, 1, 12)

        self.assertEqual(int('0123', 0), 83)
        self.assertEqual(int('0x123', 16), 291)

        # Bug 1679: "0x" is not a valid hex literal
        self.assertRaises(ValueError, int, "0x", 16)
        self.assertRaises(ValueError, int, "0x", 0)

        self.assertRaises(ValueError, int, "0o", 8)
        self.assertRaises(ValueError, int, "0o", 0)

        self.assertRaises(ValueError, int, "0b", 2)
        self.assertRaises(ValueError, int, "0b", 0)


        # SF bug 1334662: int(string, base) wrong answers
        # Various representations of 2**32 evaluated to 0
        # rather than 2**32 in previous versions

        self.assertEqual(int('100000000000000000000000000000000', 2), 4294967296L)
        self.assertEqual(int('102002022201221111211', 3), 4294967296L)
        self.assertEqual(int('10000000000000000', 4), 4294967296L)
        self.assertEqual(int('32244002423141', 5), 4294967296L)
        self.assertEqual(int('1550104015504', 6), 4294967296L)
        self.assertEqual(int('211301422354', 7), 4294967296L)
        self.assertEqual(int('40000000000', 8), 4294967296L)
        self.assertEqual(int('12068657454', 9), 4294967296L)
        self.assertEqual(int('4294967296', 10), 4294967296L)
        self.assertEqual(int('1904440554', 11), 4294967296L)
        self.assertEqual(int('9ba461594', 12), 4294967296L)
        self.assertEqual(int('535a79889', 13), 4294967296L)
        self.assertEqual(int('2ca5b7464', 14), 4294967296L)
        self.assertEqual(int('1a20dcd81', 15), 4294967296L)
        self.assertEqual(int('100000000', 16), 4294967296L)
        self.assertEqual(int('a7ffda91', 17), 4294967296L)
        self.assertEqual(int('704he7g4', 18), 4294967296L)
        self.assertEqual(int('4f5aff66', 19), 4294967296L)
        self.assertEqual(int('3723ai4g', 20), 4294967296L)
        self.assertEqual(int('281d55i4', 21), 4294967296L)
        self.assertEqual(int('1fj8b184', 22), 4294967296L)
        self.assertEqual(int('1606k7ic', 23), 4294967296L)
        self.assertEqual(int('mb994ag', 24), 4294967296L)
        self.assertEqual(int('hek2mgl', 25), 4294967296L)
        self.assertEqual(int('dnchbnm', 26), 4294967296L)
        self.assertEqual(int('b28jpdm', 27), 4294967296L)
        self.assertEqual(int('8pfgih4', 28), 4294967296L)
        self.assertEqual(int('76beigg', 29), 4294967296L)
        self.assertEqual(int('5qmcpqg', 30), 4294967296L)
        self.assertEqual(int('4q0jto4', 31), 4294967296L)
        self.assertEqual(int('4000000', 32), 4294967296L)
        self.assertEqual(int('3aokq94', 33), 4294967296L)
        self.assertEqual(int('2qhxjli', 34), 4294967296L)
        self.assertEqual(int('2br45qb', 35), 4294967296L)
        self.assertEqual(int('1z141z4', 36), 4294967296L)

        # tests with base 0
        # this fails on 3.0, but in 2.x the old octal syntax is allowed
        self.assertEqual(int(' 0123  ', 0), 83)
        self.assertEqual(int(' 0123  ', 0), 83)
        self.assertEqual(int('000', 0), 0)
        self.assertEqual(int('0o123', 0), 83)
        self.assertEqual(int('0x123', 0), 291)
        self.assertEqual(int('0b100', 0), 4)
        self.assertEqual(int(' 0O123   ', 0), 83)
        self.assertEqual(int(' 0X123  ', 0), 291)
        self.assertEqual(int(' 0B100 ', 0), 4)
        self.assertEqual(int('0', 0), 0)
        self.assertEqual(int('+0', 0), 0)
        self.assertEqual(int('-0', 0), 0)
        self.assertEqual(int('00', 0), 0)
        self.assertRaises(ValueError, int, '08', 0)
        self.assertRaises(ValueError, int, '-012395', 0)

        # without base still base 10
        self.assertEqual(int('0123'), 123)
        self.assertEqual(int('0123', 10), 123)

        # tests with prefix and base != 0
        self.assertEqual(int('0x123', 16), 291)
        self.assertEqual(int('0o123', 8), 83)
        self.assertEqual(int('0b100', 2), 4)
        self.assertEqual(int('0X123', 16), 291)
        self.assertEqual(int('0O123', 8), 83)
        self.assertEqual(int('0B100', 2), 4)

        # the code has special checks for the first character after the
        #  type prefix
        self.assertRaises(ValueError, int, '0b2', 2)
        self.assertRaises(ValueError, int, '0b02', 2)
        self.assertRaises(ValueError, int, '0B2', 2)
        self.assertRaises(ValueError, int, '0B02', 2)
        self.assertRaises(ValueError, int, '0o8', 8)
        self.assertRaises(ValueError, int, '0o08', 8)
        self.assertRaises(ValueError, int, '0O8', 8)
        self.assertRaises(ValueError, int, '0O08', 8)
        self.assertRaises(ValueError, int, '0xg', 16)
        self.assertRaises(ValueError, int, '0x0g', 16)
        self.assertRaises(ValueError, int, '0Xg', 16)
        self.assertRaises(ValueError, int, '0X0g', 16)

        # SF bug 1334662: int(string, base) wrong answers
        # Checks for proper evaluation of 2**32 + 1
        self.assertEqual(int('100000000000000000000000000000001', 2), 4294967297L)
        self.assertEqual(int('102002022201221111212', 3), 4294967297L)
        self.assertEqual(int('10000000000000001', 4), 4294967297L)
        self.assertEqual(int('32244002423142', 5), 4294967297L)
        self.assertEqual(int('1550104015505', 6), 4294967297L)
        self.assertEqual(int('211301422355', 7), 4294967297L)
        self.assertEqual(int('40000000001', 8), 4294967297L)
        self.assertEqual(int('12068657455', 9), 4294967297L)
        self.assertEqual(int('4294967297', 10), 4294967297L)
        self.assertEqual(int('1904440555', 11), 4294967297L)
        self.assertEqual(int('9ba461595', 12), 4294967297L)
        self.assertEqual(int('535a7988a', 13), 4294967297L)
        self.assertEqual(int('2ca5b7465', 14), 4294967297L)
        self.assertEqual(int('1a20dcd82', 15), 4294967297L)
        self.assertEqual(int('100000001', 16), 4294967297L)
        self.assertEqual(int('a7ffda92', 17), 4294967297L)
        self.assertEqual(int('704he7g5', 18), 4294967297L)
        self.assertEqual(int('4f5aff67', 19), 4294967297L)
        self.assertEqual(int('3723ai4h', 20), 4294967297L)
        self.assertEqual(int('281d55i5', 21), 4294967297L)
        self.assertEqual(int('1fj8b185', 22), 4294967297L)
        self.assertEqual(int('1606k7id', 23), 4294967297L)
        self.assertEqual(int('mb994ah', 24), 4294967297L)
        self.assertEqual(int('hek2mgm', 25), 4294967297L)
        self.assertEqual(int('dnchbnn', 26), 4294967297L)
        self.assertEqual(int('b28jpdn', 27), 4294967297L)
        self.assertEqual(int('8pfgih5', 28), 4294967297L)
        self.assertEqual(int('76beigh', 29), 4294967297L)
        self.assertEqual(int('5qmcpqh', 30), 4294967297L)
        self.assertEqual(int('4q0jto5', 31), 4294967297L)
        self.assertEqual(int('4000001', 32), 4294967297L)
        self.assertEqual(int('3aokq95', 33), 4294967297L)
        self.assertEqual(int('2qhxjlj', 34), 4294967297L)
        self.assertEqual(int('2br45qc', 35), 4294967297L)
        self.assertEqual(int('1z141z5', 36), 4294967297L)
コード例 #42
0
ファイル: format.py プロジェクト: statgen/fivex
    def __call__(self, row: str) -> VariantContainer:

        """
        This is a stub class that specifies how to parse a line. It could accept configuration in the future,
        eg diff column numbers if there was more than one file with the same data arranged in diff ways

        It does the work of finding the fields, and of turning the text file into numeric data where appropriate

        The parser is the piece tied to file format, so this must change if the file format changes!
        """
        fields: ty.List[ty.Any] = row.split("\t")
        # Revise if data format changes!
        # fields[1] = fields[1].replace("chr", "")  # chrom
        if self.tissue and self.study:
            # Tissue-and-study-specific files have two fewer columns (study and tissue),
            # and so the fields must be appended to match the number of fields in the all-tissue file
            tissuevar = self.tissue
            fields = [self.study, tissuevar] + fields
        else:
            tissuevar = fields[1]

        # Field numbers. See also: https://github.com/eQTL-Catalogue/eQTL-Catalogue-resources/blob/master/tabix/Columns.md
        # 0: study
        # 1: tissue
        # 2: molecular_trait_id
        #  for spliceQTLs, this looks like 'ENSG00000008128.grp_1.contained.ENST00000356200'
        # 3: chromosome
        # 4: position (int)
        # 5: ref
        # 6: alt
        # 7: variant (chr_pos_ref_alt)
        # 8: ma_samples (int)
        # 9: maf (float)
        # 10: pvalue (float)
        # 11: beta (float)
        # 12: se (float)
        # 13: type (SNP, INDEL, etc)
        # 14: ac (allele count) (int)
        # 15: an (total number of alleles = 2 * sample size) (int)
        # 16: r2 (float)
        # 17: molecular_trait_object_id
        #  for spliceQTLs, this looks like 'ENSG00000008128.contained'
        # 18: gene_id (ENSG#)
        # 19: median_tpm (float)
        # 20: rsid
        if self.datatype == "ge":
            fields[2] = None
        fields[4] = int(fields[4])  # pos
        fields[8] = int(fields[8])  # ma_samples
        fields[9] = float(fields[9])  # maf
        fields[10] = parser_utils.parse_pval_to_log(
            fields[10], is_neg_log=False
        )  # pvalue_nominal --> serialize as log
        fields[11] = float(fields[11])  # beta
        fields[12] = float(fields[12])  # stderr_beta
        fields[14] = int(fields[14])  # allele_count
        fields[15] = int(fields[15])  # total_number_of_alleles
        try:
            fields[16] = float(fields[16])  # r2
        except ValueError:
            # TODO: Make the "NA" -> None check more explicit
            fields[16] = None
        fields[19] = float(fields[19])  # median_tpm  # FIXME: Handle NA case

        # Append build
        build = "GRCh38"

        # Append tss_distance
        gene_tss = self.tss_dict.get(fields[18].split(".")[0], float("nan"))
        tss_distance = math.copysign(1, gene_tss) * (fields[4] - abs(gene_tss))
        tss_position = -abs(gene_tss)

        # Append gene symbol
        geneSymbol = self.gene_json.get(
            fields[18].split(".")[0], "Unknown_gene"
        )

        # Add tissue grouping and sample size from GTEx
        # tissue_data = TISSUE_DATA.get(tissuevar, ("Unknown_Tissue", None))
        # fields.extend(tissue_data)

        # Append system information
        tissueSystem = TISSUES_TO_SYSTEMS.get(tissuevar, "Unknown")
        if fields[2] is not None:
            (_, _, _, transcript) = fields[2].split(".")
        else:
            transcript = None

        fields.extend(
            [
                build,
                tss_distance,
                tss_position,
                geneSymbol,
                tissueSystem,
                transcript,
            ]
        )
        return VariantContainer(*fields)
コード例 #43
0
 def test_small_ints(self):
     # Bug #3236: Return small longs from PyLong_FromString
     self.assertIs(int('10'), 10)
     self.assertIs(int('-1'), -1)
     self.assertIs(int(b'10'), 10)
     self.assertIs(int(b'-1'), -1)