Example #1
0
    def getAllPartiesFromContributions(self, table):
        self.display(table, 'initial table')
        recipientsTable = table['recipient_id']
        self.display(recipientsTable, 'recipientsTable')
        uniqueRecipientsTable = np.unique(recipientsTable)
        self.display(uniqueRecipientsTable, 'uniqueRecipientsTable')
        uniqueCandidateRecipientsTable = np.array(list(
            filter(lambda recipient: recipient.startswith('N'),
                   uniqueRecipientsTable)),
                                                  dtype=[('recipient_id', 'O')
                                                         ])
        self.display(uniqueCandidateRecipientsTable,
                     'uniqueCandidateRecipientsTable')
        uniqueCommitteeRecipientsTable = np.array(list(
            filter(lambda recipient: recipient.startswith('C'),
                   uniqueRecipientsTable)),
                                                  dtype=[('recipient_id', 'O')
                                                         ])
        self.display(uniqueCommitteeRecipientsTable,
                     'uniqueCommitteeRecipientsTable')

        candidatesTable = self.tables['candidates'].copy()
        self.display(candidatesTable, 'candidatesTable')
        candidatesTableColumns = [
            column for column in self.sourceData['candidates'][0]
        ]
        candidatesTableColumns[0] = 'recipient_id'
        candidatesTable.dtype.names = tuple(candidatesTableColumns)

        committeesTable = self.tables['committees'].copy()
        self.display(committeesTable, 'committeesTable')
        committeesTableColumns = [
            column for column in self.sourceData['committees'][0]
        ]
        committeesTableColumns[0] = 'recipient_id'
        committeesTable.dtype.names = tuple(committeesTableColumns)

        candidateRecipientsTable = rfn.join_by('recipient_id',
                                               candidatesTable,
                                               uniqueCandidateRecipientsTable,
                                               jointype='inner',
                                               usemask=False)
        self.display(candidateRecipientsTable, 'candidateRecipientsTable')

        committeeRecipientsTable = rfn.join_by('recipient_id',
                                               committeesTable,
                                               uniqueCommitteeRecipientsTable,
                                               jointype='inner',
                                               usemask=False)
        self.display(committeeRecipientsTable, 'committeeRecipientsTable')

        candidatePartiesTable = np.unique(candidateRecipientsTable['party'])
        self.display(candidatePartiesTable, 'candidatePartiesTable')
        committeePartiesTable = np.unique(committeeRecipientsTable['party'])
        self.display(committeePartiesTable, 'committeePartiesTable')
        return candidatePartiesTable, committeePartiesTable
Example #2
0
 def _get_data(self, ip):
     # Read summary
     tshark = ('tshark -r %s '
               '-q -z icmpv6,srt,'
               'ipv6.addr==%s'
               '|tail -n5|head -n1') % (self.filename, ip)
     p = self.env.run_host(tshark)
     summary = p.stdout.readline().decode().split()
     # Get longest window
     tshark = (
         'tshark -r %s '
         '-T fields -e frame.time_relative -e icmpv6.echo.sequence_number '
         '"ipv6.addr==%s&&icmpv6.type==%i" '
         '-E separator=,')  # %(filename, ip, icmpv6.type)
     p = self.env.run_host(tshark % (self.filename, ip, 128))
     reqs = p.stdout.readlines()
     data = [
         numpy.fromstring(line.decode().strip(), dtype=float, sep=',')
         for line in reqs
     ]
     reqs = numpy.vstack(data) if data else numpy.array([[]])
     reqs.dtype = [('time_req', float), ('id', float)]
     p = self.env.run_host(tshark % (self.filename, ip, 129))
     reps = p.stdout.readlines()
     data = [
         numpy.fromstring(line.decode().strip(), dtype=float, sep=',')
         for line in reps
     ]
     reps = numpy.vstack(data) if data else numpy.array([[]])
     reps.dtype = [('time_rep', float), ('id', float)]
     max_offline = '<NA>'
     if reqs.size > 0:
         res = rf.join_by('id', reps, reqs, jointype='outer')
         # Find largest "True"
         max_offline = 0
         current_offline = 0
         last_sent = 0
         i = 0
         while i < res.size:
             #Offline window:
             while i < res.size and res.mask['time_rep'][i]:
                 i += 1
             if i < res.size:
                 current_offline = res.data['time_rep'][i] - last_sent
             else:
                 current_offline = res.data['time_req'][i - 1] - last_sent
             #Online window:
             while i < res.size and not res.mask['time_rep'][i]:
                 last_sent = res.data['time_req'][i]
                 i += 1
             if current_offline > max_offline:
                 max_offline = current_offline
                 current_offline += 1
     # Format data:
     data = dict()
     headers = ['Filename', 'Requests', 'Replies', 'Lost', 'Max_offline']
     values = [
         self.filename, summary[0], summary[1], summary[2], max_offline
     ]
     return dict(zip(headers, values))
 def test_different_field_order(self):
     # gh-8940
     a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u1')])
     b = np.ones(3, dtype=[('c', 'u1'), ('b', 'f4'), ('a', 'i4')])
     # this should not give a FutureWarning:
     j = join_by(['c', 'b'], a, b, jointype='inner', usemask=False)
     assert_equal(j.dtype.names, ['b', 'c', 'a1', 'a2'])
Example #4
0
    def filter_effects(self):
        """
        Merge effects and data, and flip effect alleles 
        """
        effect_positions=self.effects[["CHR", "POS"]]
        data_positions=self.data.snp[["CHR", "POS"]]

        effect_include=np.in1d(effect_positions, data_positions)
        data_include=np.in1d(data_positions, effect_positions)

        self.data.filter_snps(data_include)
        self.effects=self.effects[effect_include]
        # Just give up and convert to float. I have no idea why int doesn't work here
        # but it's something to do with the fact that you can't have None as a numpy int
        # wheras float gets converted to nan. 
        tmp_data=nprec.append_fields(self.data.snp, "GENO", None, dtypes=[(float,self.data.geno.shape[1])],usemask=False)
        tmp_data["GENO"]=self.data.geno
        self.effects=nprec.join_by(["CHR", "POS"], self.effects, tmp_data, usemask=False, jointype="inner")
        flipped=0
        removed=0
        for rec in self.effects:
            if rec["EFFECT"]==rec["REF"] and rec["OTHER"]==rec["ALT"]:
                pass
            elif rec["OTHER"]==rec["REF"] and rec["EFFECT"]==rec["ALT"]:
                flipped+=1
                rec["OTHER"]=rec["ALT"]
                rec["EFFECT"]=rec["REF"]
                rec["BETA"]=-rec["BETA"]
            else:
                removed+=1
                rec["EFFECT"]=rec["OTHER"]="N"

        self.effects=self.effects[self.effects["EFFECT"]!="N"]
        print( "Removed "+str(removed)+" non-matching alleles",file=sys.stderr)
        print( "Flipped "+str(flipped)+" alleles",file=sys.stderr)
    def test_two_keys_two_vars(self):
        a = np.array(list(
            zip(np.tile([10, 11], 5), np.repeat(np.arange(5), 2),
                np.arange(50, 60), np.arange(10, 20))),
                     dtype=[('k', int), ('a', int), ('b', int), ('c', int)])

        b = np.array(list(
            zip(np.tile([10, 11], 5), np.repeat(np.arange(5), 2),
                np.arange(65, 75), np.arange(0, 10))),
                     dtype=[('k', int), ('a', int), ('b', int), ('c', int)])

        control = np.array([(10, 0, 50, 65, 10, 0), (11, 0, 51, 66, 11, 1),
                            (10, 1, 52, 67, 12, 2), (11, 1, 53, 68, 13, 3),
                            (10, 2, 54, 69, 14, 4), (11, 2, 55, 70, 15, 5),
                            (10, 3, 56, 71, 16, 6), (11, 3, 57, 72, 17, 7),
                            (10, 4, 58, 73, 18, 8), (11, 4, 59, 74, 19, 9)],
                           dtype=[('k', int), ('a', int), ('b1', int),
                                  ('b2', int), ('c1', int), ('c2', int)])
        test = join_by(['a', 'k'],
                       a,
                       b,
                       r1postfix='1',
                       r2postfix='2',
                       jointype='inner')
        assert_equal(test.dtype, control.dtype)
        assert_equal(test, control)
Example #6
0
def GetTileDefs(args, strtype='|S12'):
    #t = esutil.io.read(args.tiles)[args.tilecol][0:2]
    t = esutil.io.read(args.tiles)[args.tilecol]
    tindex = np.arange(len(t))
    tiles = np.empty(len(t), dtype=[('tilename',strtype), ('index', np.int64)])
    tiles['tilename'] = t.astype(strtype)
    tiles['index'] = np.arange(len(t))

    if args.density is not None:
        for tile in tiles['tilename']:
            outdir = os.path.join(args.outdir, tile)
            if not os.path.exists(outdir):
                os.makedirs(outdir)

    cur = desdb.connect()
    q = "select urall, uraur, udecll, udecur, tilename from coaddtile order by udecll desc, urall asc"
    arr = cur.quick(q, array=True)

    dt = arr.dtype.descr
    dt[-1] = ('tilename',strtype)
    dt = np.dtype(dt)
    newarr = np.empty(len(arr), dtype=dt)
    for i in range(len(arr.dtype.names)):
        name = arr.dtype.names[i]
        if i == 4:
            newarr[name] = arr[name].astype(strtype)
        else:
            newarr[name] = arr[name]

    tiles = rec.join_by('tilename', newarr, tiles, usemask=False)
    tiles = np.sort(tiles, order='index')
    return tiles
Example #7
0
    def test_two_keys_two_vars(self):
        a = np.array(
            list(zip(np.tile([10, 11], 5), np.repeat(np.arange(5), 2), np.arange(50, 60), np.arange(10, 20))),
            dtype=[("k", int), ("a", int), ("b", int), ("c", int)],
        )

        b = np.array(
            list(zip(np.tile([10, 11], 5), np.repeat(np.arange(5), 2), np.arange(65, 75), np.arange(0, 10))),
            dtype=[("k", int), ("a", int), ("b", int), ("c", int)],
        )

        control = np.array(
            [
                (10, 0, 50, 65, 10, 0),
                (11, 0, 51, 66, 11, 1),
                (10, 1, 52, 67, 12, 2),
                (11, 1, 53, 68, 13, 3),
                (10, 2, 54, 69, 14, 4),
                (11, 2, 55, 70, 15, 5),
                (10, 3, 56, 71, 16, 6),
                (11, 3, 57, 72, 17, 7),
                (10, 4, 58, 73, 18, 8),
                (11, 4, 59, 74, 19, 9),
            ],
            dtype=[("k", int), ("a", int), ("b1", int), ("b2", int), ("c1", int), ("c2", int)],
        )
        test = join_by(["a", "k"], a, b, r1postfix="1", r2postfix="2", jointype="inner")
        assert_equal(test.dtype, control.dtype)
        assert_equal(test, control)
Example #8
0
    def test_outer_join(self):
        a, b = self.a, self.b

        test = join_by(('a', 'b'), a, b, 'outer')
        control = ma.array([(0, 50, 100, -1), (1, 51, 101, -1),
                            (2, 52, 102, -1), (3, 53, 103, -1),
                            (4, 54, 104, -1), (5, 55, 105, -1),
                            (5, 65, -1, 100), (6, 56, 106, -1),
                            (6, 66, -1, 101), (7, 57, 107, -1),
                            (7, 67, -1, 102), (8, 58, 108, -1),
                            (8, 68, -1, 103), (9, 59, 109, -1),
                            (9, 69, -1, 104), (10, 70, -1, 105),
                            (11, 71, -1, 106), (12, 72, -1, 107),
                            (13, 73, -1, 108), (14, 74, -1, 109)],
                           mask=[(0, 0, 0, 1), (0, 0, 0, 1),
                                 (0, 0, 0, 1), (0, 0, 0, 1),
                                 (0, 0, 0, 1), (0, 0, 0, 1),
                                 (0, 0, 1, 0), (0, 0, 0, 1),
                                 (0, 0, 1, 0), (0, 0, 0, 1),
                                 (0, 0, 1, 0), (0, 0, 0, 1),
                                 (0, 0, 1, 0), (0, 0, 0, 1),
                                 (0, 0, 1, 0), (0, 0, 1, 0),
                                 (0, 0, 1, 0), (0, 0, 1, 0),
                                 (0, 0, 1, 0), (0, 0, 1, 0)],
                           dtype=[('a', int), ('b', int),
                                  ('c', int), ('d', int)])
        assert_equal(test, control)
Example #9
0
def GetTileDefs(args, strtype='|S12'):
    #t = esutil.io.read(args.tiles)[args.tilecol][0:2]
    t = esutil.io.read(args.tiles)[args.tilecol]
    tindex = np.arange(len(t))
    tiles = np.empty(len(t),
                     dtype=[('tilename', strtype), ('index', np.int64)])
    tiles['tilename'] = t.astype(strtype)
    tiles['index'] = np.arange(len(t))

    if args.density is not None:
        for tile in tiles['tilename']:
            outdir = os.path.join(args.outdir, tile)
            if not os.path.exists(outdir):
                os.makedirs(outdir)

    cur = desdb.connect()
    q = "select urall, uraur, udecll, udecur, tilename from coaddtile order by udecll desc, urall asc"
    arr = cur.quick(q, array=True)

    dt = arr.dtype.descr
    dt[-1] = ('tilename', strtype)
    dt = np.dtype(dt)
    newarr = np.empty(len(arr), dtype=dt)
    for i in range(len(arr.dtype.names)):
        name = arr.dtype.names[i]
        if i == 4:
            newarr[name] = arr[name].astype(strtype)
        else:
            newarr[name] = arr[name]

    tiles = rec.join_by('tilename', newarr, tiles, usemask=False)
    tiles = np.sort(tiles, order='index')
    return tiles
Example #10
0
    def test_leftouter_join(self):
        a, b = self.a, self.b

        test = join_by(("a", "b"), a, b, "leftouter")
        control = ma.array(
            [
                (0, 50, 100, -1),
                (1, 51, 101, -1),
                (2, 52, 102, -1),
                (3, 53, 103, -1),
                (4, 54, 104, -1),
                (5, 55, 105, -1),
                (6, 56, 106, -1),
                (7, 57, 107, -1),
                (8, 58, 108, -1),
                (9, 59, 109, -1),
            ],
            mask=[
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
            ],
            dtype=[("a", int), ("b", int), ("c", int), ("d", int)],
        )
        assert_equal(test, control)
Example #11
0
    def get_joined_array(self, arr1, arr2):
        # https://stackoverflow.com/questions/23500754/numpy-how-to-outer-join-arrays

        if (len(arr1) > 0):
            box1 = arr1
            if len(arr2) > 0:
                box2 = arr2
            else:
                box2 = [box1[0]]
        else:
            if len(arr2) > 0:
                box2 = arr2
                box1 = [box2[0]]
            else:
                return None

        a3 = np.array(box1, dtype=[('col1', np.int8)])
        a2 = np.array(box2, dtype=[('col1', np.int8)])
        a1 = a3[0]

        result = a1

        for a in (a2, a3):
            cols = list(set(result.dtype.names).intersection(a.dtype.names))
            result = recfunctions.join_by(cols, result, a, jointype='outer')

        pr_fr_l = []
        for item in result:
            pr_fr_l.append(item[0])
        print(pr_fr_l)
        return pr_fr_l
 def test_different_field_order(self):
     # gh-8940
     a = np.zeros(3, dtype=[("a", "i4"), ("b", "f4"), ("c", "u1")])
     b = np.ones(3, dtype=[("c", "u1"), ("b", "f4"), ("a", "i4")])
     # this should not give a FutureWarning:
     j = join_by(["c", "b"], a, b, jointype="inner", usemask=False)
     assert_equal(j.dtype.names, ["b", "c", "a1", "a2"])
def merge_cort(data, cortisol_filename):
    
    cort_data = np.genfromtxt(cortisol_filename, dtype=None, names=True, delimiter='\t')
    
    names = list(cort_data.dtype.names)
    
    # Find all the columns in cort_data that have 'av' in their title
    # and not '_mask'
    drop_names = names[8:]

    cort_data = nprf.drop_fields(cort_data, drop_names, usemask=False, asrecarray=True)
    
    data = nprf.join_by('SubID', data, cort_data, jointype='leftouter',
                            r1postfix='KW', r2postfix='KW2', usemask=False,asrecarray=True)
    
    # Bizzarely, the join_by function pads with the biggest numbers it can think of!
    # So we're going to replace everything over 999 with 999
    for name in names[1:8]:
        data[name][data[name]>999] = 999
    
    # Define a UsableCort field: 1 if ANY of the cortisol values are not 999
    cort_array = np.vstack( [ data[name] for name in names[1:8]])
    usable_cort_array = np.ones(cort_array.shape[1])
    usable_cort_array[np.any(cort_array<>999, axis=0)] = 1
    
    data = nprf.append_fields(base = data, names='UsableCort', data = usable_cort_array, usemask=False)

    return data
Example #14
0
    def test_outer_join(self):
        a, b = self.a, self.b

        test = join_by(('a', 'b'), a, b, 'outer')
        control = ma.array([(0, 50, 100, -1), (1, 51, 101, -1),
                            (2, 52, 102, -1), (3, 53, 103, -1),
                            (4, 54, 104, -1), (5, 55, 105, -1),
                            (5, 65, -1, 100), (6, 56, 106, -1),
                            (6, 66, -1, 101), (7, 57, 107, -1),
                            (7, 67, -1, 102), (8, 58, 108, -1),
                            (8, 68, -1, 103), (9, 59, 109, -1),
                            (9, 69, -1, 104), (10, 70, -1, 105),
                            (11, 71, -1, 106), (12, 72, -1, 107),
                            (13, 73, -1, 108), (14, 74, -1, 109)],
                           mask=[(0, 0, 0, 1), (0, 0, 0, 1),
                                 (0, 0, 0, 1), (0, 0, 0, 1),
                                 (0, 0, 0, 1), (0, 0, 0, 1),
                                 (0, 0, 1, 0), (0, 0, 0, 1),
                                 (0, 0, 1, 0), (0, 0, 0, 1),
                                 (0, 0, 1, 0), (0, 0, 0, 1),
                                 (0, 0, 1, 0), (0, 0, 0, 1),
                                 (0, 0, 1, 0), (0, 0, 1, 0),
                                 (0, 0, 1, 0), (0, 0, 1, 0),
                                 (0, 0, 1, 0), (0, 0, 1, 0)],
                           dtype=[('a', int), ('b', int),
                                  ('c', int), ('d', int)])
        assert_equal(test, control)
    def test_no_r2postfix(self):
        # Basic test of join_by no_r2postfix
        a, b = self.a, self.b

        test = join_by("a",
                       a,
                       b,
                       r1postfix="1",
                       r2postfix="",
                       jointype="inner")
        control = np.array(
            [
                (0, 50, 65, 100, 100),
                (1, 51, 66, 101, 101),
                (2, 52, 67, 102, 102),
                (3, 53, 68, 103, 103),
                (4, 54, 69, 104, 104),
                (5, 55, 70, 105, 105),
                (6, 56, 71, 106, 106),
                (7, 57, 72, 107, 107),
                (8, 58, 73, 108, 108),
                (9, 59, 74, 109, 109),
            ],
            dtype=[("a", int), ("b1", int), ("b", int), ("c", int),
                   ("d", int)],
        )
        assert_equal(test, control)
Example #16
0
 def test_join_subdtype(self):
     # tests the bug in https://stackoverflow.com/q/44769632/102441
     foo = np.array([(1, )], dtype=[('key', int)])
     bar = np.array([(1, np.array([1, 2, 3]))],
                    dtype=[('key', int), ('value', 'uint16', 3)])
     res = join_by('key', foo, bar)
     assert_equal(res, bar.view(ma.MaskedArray))
Example #17
0
 def test_different_field_order(self):
     # gh-8940
     a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u1')])
     b = np.ones(3, dtype=[('c', 'u1'), ('b', 'f4'), ('a', 'i4')])
     # this should not give a FutureWarning:
     j = join_by(['c', 'b'], a, b, jointype='inner', usemask=False)
     assert_equal(j.dtype.names, ['b', 'c', 'a1', 'a2'])
    def test_leftouter_join(self):
        a, b = self.a, self.b

        test = join_by(("a", "b"), a, b, "leftouter")
        control = ma.array(
            [
                (0, 50, 100, -1),
                (1, 51, 101, -1),
                (2, 52, 102, -1),
                (3, 53, 103, -1),
                (4, 54, 104, -1),
                (5, 55, 105, -1),
                (6, 56, 106, -1),
                (7, 57, 107, -1),
                (8, 58, 108, -1),
                (9, 59, 109, -1),
            ],
            mask=[
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
            ],
            dtype=[("a", int), ("b", int), ("c", int), ("d", int)],
        )
        assert_equal(test, control)
    def test_join(self):
        a, b = self.a, self.b

        # Fixme, this test is broken
        #test = join_by(('a', 'b'), a, b)
        #control = np.array([(5, 55, 105, 100), (6, 56, 106, 101),
        #                    (7, 57, 107, 102), (8, 58, 108, 103),
        #                    (9, 59, 109, 104)],
        #                   dtype=[('a', int), ('b', int),
        #                          ('c', int), ('d', int)])
        #assert_equal(test, control)

        # Hack to avoid pyflakes unused variable warnings
        join_by(('a', 'b'), a, b)
        np.array([(5, 55, 105, 100), (6, 56, 106, 101), (7, 57, 107, 102),
                  (8, 58, 108, 103), (9, 59, 109, 104)],
                 dtype=[('a', int), ('b', int), ('c', int), ('d', int)])
Example #20
0
 def test_join_subdtype(self):
     # tests the bug in https://stackoverflow.com/q/44769632/102441
     from numpy.lib import recfunctions as rfn
     foo = np.array([(1,)],
                    dtype=[('key', int)])
     bar = np.array([(1, np.array([1,2,3]))],
                    dtype=[('key', int), ('value', 'uint16', 3)])
     res = join_by('key', foo, bar)
     assert_equal(res, bar.view(ma.MaskedArray))
    def test_join_subdtype(self):
        # tests the bug in https://stackoverflow.com/q/44769632/102441
        from numpy.lib import recfunctions as rfn

        foo = np.array([(1, )], dtype=[("key", int)])
        bar = np.array([(1, np.array([1, 2, 3]))],
                       dtype=[("key", int), ("value", "uint16", 3)])
        res = join_by("key", foo, bar)
        assert_equal(res, bar.view(ma.MaskedArray))
Example #22
0
    def test_join(self):
        a, b = self.a, self.b

        # Fixme, this test is broken
        # test = join_by(('a', 'b'), a, b)
        # control = np.array([(5, 55, 105, 100), (6, 56, 106, 101),
        #                    (7, 57, 107, 102), (8, 58, 108, 103),
        #                    (9, 59, 109, 104)],
        #                   dtype=[('a', int), ('b', int),
        #                          ('c', int), ('d', int)])
        # assert_equal(test, control)

        # Hack to avoid pyflakes unused variable warnings
        join_by(("a", "b"), a, b)
        np.array(
            [(5, 55, 105, 100), (6, 56, 106, 101), (7, 57, 107, 102), (8, 58, 108, 103), (9, 59, 109, 104)],
            dtype=[("a", int), ("b", int), ("c", int), ("d", int)],
        )
    def test_inner_join(self):
        # Basic test of join_by
        a, b = self.a, self.b

        test = join_by('a', a, b, jointype='inner')
        control = np.array([(5, 55, 65, 105, 100), (6, 56, 66, 106, 101),
                            (7, 57, 67, 107, 102), (8, 58, 68, 108, 103),
                            (9, 59, 69, 109, 104)],
                           dtype=[('a', int), ('b1', int), ('b2', int),
                                  ('c', int), ('d', int)])
        assert_equal(test, control)
Example #24
0
    def test_same_name_different_dtypes_key(self):
        a_dtype = np.dtype([('key', 'S5'), ('value', '<f4')])
        b_dtype = np.dtype([('key', 'S10'), ('value', '<f4')])
        expected_dtype = np.dtype([
            ('key', 'S10'), ('value1', '<f4'), ('value2', '<f4')])

        a = np.array([('Sarah',  8.0), ('John', 6.0)], dtype=a_dtype)
        b = np.array([('Sarah', 10.0), ('John', 7.0)], dtype=b_dtype)
        res = join_by('key', a, b)

        assert_equal(res.dtype, expected_dtype)
    def test_same_name_different_dtypes_key(self):
        a_dtype = np.dtype([("key", "S5"), ("value", "<f4")])
        b_dtype = np.dtype([("key", "S10"), ("value", "<f4")])
        expected_dtype = np.dtype([("key", "S10"), ("value1", "<f4"),
                                   ("value2", "<f4")])

        a = np.array([("Sarah", 8.0), ("John", 6.0)], dtype=a_dtype)
        b = np.array([("Sarah", 10.0), ("John", 7.0)], dtype=b_dtype)
        res = join_by("key", a, b)

        assert_equal(res.dtype, expected_dtype)
Example #26
0
    def test_inner_join(self):
        # Basic test of join_by
        a, b = self.a, self.b

        test = join_by('a', a, b, jointype='inner')
        control = np.array([(5, 55, 65, 105, 100), (6, 56, 66, 106, 101),
                            (7, 57, 67, 107, 102), (8, 58, 68, 108, 103),
                            (9, 59, 69, 109, 104)],
                           dtype=[('a', int), ('b1', int), ('b2', int),
                                  ('c', int), ('d', int)])
        assert_equal(test, control)
Example #27
0
    def test_same_name_different_dtypes_key(self):
        a_dtype = np.dtype([('key', 'S5'), ('value', '<f4')])
        b_dtype = np.dtype([('key', 'S10'), ('value', '<f4')])
        expected_dtype = np.dtype([
            ('key', 'S10'), ('value1', '<f4'), ('value2', '<f4')])

        a = np.array([('Sarah',  8.0), ('John', 6.0)], dtype=a_dtype)
        b = np.array([('Sarah', 10.0), ('John', 7.0)], dtype=b_dtype)
        res = join_by('key', a, b)

        assert_equal(res.dtype, expected_dtype)
    def test_two_keys_two_vars(self):
        a = np.array(
            list(
                zip(
                    np.tile([10, 11], 5),
                    np.repeat(np.arange(5), 2),
                    np.arange(50, 60),
                    np.arange(10, 20),
                )),
            dtype=[("k", int), ("a", int), ("b", int), ("c", int)],
        )

        b = np.array(
            list(
                zip(
                    np.tile([10, 11], 5),
                    np.repeat(np.arange(5), 2),
                    np.arange(65, 75),
                    np.arange(0, 10),
                )),
            dtype=[("k", int), ("a", int), ("b", int), ("c", int)],
        )

        control = np.array(
            [
                (10, 0, 50, 65, 10, 0),
                (11, 0, 51, 66, 11, 1),
                (10, 1, 52, 67, 12, 2),
                (11, 1, 53, 68, 13, 3),
                (10, 2, 54, 69, 14, 4),
                (11, 2, 55, 70, 15, 5),
                (10, 3, 56, 71, 16, 6),
                (11, 3, 57, 72, 17, 7),
                (10, 4, 58, 73, 18, 8),
                (11, 4, 59, 74, 19, 9),
            ],
            dtype=[
                ("k", int),
                ("a", int),
                ("b1", int),
                ("b2", int),
                ("c1", int),
                ("c2", int),
            ],
        )
        test = join_by(["a", "k"],
                       a,
                       b,
                       r1postfix="1",
                       r2postfix="2",
                       jointype="inner")
        assert_equal(test.dtype, control.dtype)
        assert_equal(test, control)
Example #29
0
    def test_subarray_key(self):
        a_dtype = np.dtype([('pos', int, 3), ('f', '<f4')])
        a = np.array([([1, 1, 1], np.pi), ([1, 2, 3], 0.0)], dtype=a_dtype)

        b_dtype = np.dtype([('pos', int, 3), ('g', '<f4')])
        b = np.array([([1, 1, 1], 3), ([3, 2, 1], 0.0)], dtype=b_dtype)

        expected_dtype = np.dtype([('pos', int, 3), ('f', '<f4'), ('g', '<f4')])
        expected = np.array([([1, 1, 1], np.pi, 3)], dtype=expected_dtype)

        res = join_by('pos', a, b)
        assert_equal(res.dtype, expected_dtype)
        assert_equal(res, expected)
Example #30
0
    def test_padded_dtype(self):
        dt = np.dtype('i1,f4', align=True)
        dt.names = ('k', 'v')
        assert_(len(dt.descr), 3)  # padding field is inserted

        a = np.array([(1, 3), (3, 2)], dt)
        b = np.array([(1, 1), (2, 2)], dt)
        res = join_by('k', a, b)

        # no padding fields remain
        expected_dtype = np.dtype([('k', 'i1'), ('v1', 'f4'), ('v2', 'f4')])

        assert_equal(res.dtype, expected_dtype)
Example #31
0
    def augment_effects(self):
        """
        Add the population frequency information to the effects. 
        """

        tmp_snp=nprec.append_fields(self.data.snp, "FREQ", None, 
                                    dtypes=[(float,self.data.freq.shape[1])],
                                    usemask=False)
        tmp_snp["FREQ"]=self.data.freq
        
        new_effects=nprec.join_by(["CHR", "POS"], self.effects, tmp_snp,
                                  usemask=False, jointype="inner")
        self.effects=new_effects
Example #32
0
    def test_subarray_key(self):
        a_dtype = np.dtype([('pos', int, 3), ('f', '<f4')])
        a = np.array([([1, 1, 1], np.pi), ([1, 2, 3], 0.0)], dtype=a_dtype)

        b_dtype = np.dtype([('pos', int, 3), ('g', '<f4')])
        b = np.array([([1, 1, 1], 3), ([3, 2, 1], 0.0)], dtype=b_dtype)

        expected_dtype = np.dtype([('pos', int, 3), ('f', '<f4'), ('g', '<f4')])
        expected = np.array([([1, 1, 1], np.pi, 3)], dtype=expected_dtype)

        res = join_by('pos', a, b)
        assert_equal(res.dtype, expected_dtype)
        assert_equal(res, expected)
    def test_padded_dtype(self):
        dt = np.dtype("i1,f4", align=True)
        dt.names = ("k", "v")
        assert_(len(dt.descr), 3)  # padding field is inserted

        a = np.array([(1, 3), (3, 2)], dt)
        b = np.array([(1, 1), (2, 2)], dt)
        res = join_by("k", a, b)

        # no padding fields remain
        expected_dtype = np.dtype([("k", "i1"), ("v1", "f4"), ("v2", "f4")])

        assert_equal(res.dtype, expected_dtype)
    def test_subarray_key(self):
        a_dtype = np.dtype([("pos", int, 3), ("f", "<f4")])
        a = np.array([([1, 1, 1], np.pi), ([1, 2, 3], 0.0)], dtype=a_dtype)

        b_dtype = np.dtype([("pos", int, 3), ("g", "<f4")])
        b = np.array([([1, 1, 1], 3), ([3, 2, 1], 0.0)], dtype=b_dtype)

        expected_dtype = np.dtype([("pos", int, 3), ("f", "<f4"),
                                   ("g", "<f4")])
        expected = np.array([([1, 1, 1], np.pi, 3)], dtype=expected_dtype)

        res = join_by("pos", a, b)
        assert_equal(res.dtype, expected_dtype)
        assert_equal(res, expected)
Example #35
0
    def test_no_r2postfix(self):
        # Basic test of join_by no_r2postfix
        a, b = self.a, self.b

        test = join_by(
            'a', a, b, r1postfix='1', r2postfix='', jointype='inner')
        control = np.array([(0, 50, 65, 100, 100), (1, 51, 66, 101, 101),
                            (2, 52, 67, 102, 102), (3, 53, 68, 103, 103),
                            (4, 54, 69, 104, 104), (5, 55, 70, 105, 105),
                            (6, 56, 71, 106, 106), (7, 57, 72, 107, 107),
                            (8, 58, 73, 108, 108), (9, 59, 74, 109, 109)],
                           dtype=[('a', int), ('b1', int), ('b', int),
                                  ('c', int), ('d', int)])
        assert_equal(test, control)
    def test_leftouter_join(self):
        a, b = self.a, self.b

        test = join_by(('a', 'b'), a, b, 'leftouter')
        control = ma.array(
            [(0, 50, 100, -1), (1, 51, 101, -1), (2, 52, 102, -1),
             (3, 53, 103, -1), (4, 54, 104, -1), (5, 55, 105, -1),
             (6, 56, 106, -1), (7, 57, 107, -1), (8, 58, 108, -1),
             (9, 59, 109, -1)],
            mask=[(0, 0, 0, 1), (0, 0, 0, 1), (0, 0, 0, 1), (0, 0, 0, 1),
                  (0, 0, 0, 1), (0, 0, 0, 1), (0, 0, 0, 1), (0, 0, 0, 1),
                  (0, 0, 0, 1), (0, 0, 0, 1)],
            dtype=[('a', int), ('b', int), ('c', int), ('d', int)])
        assert_equal(test, control)
Example #37
0
    def test_no_r2postfix(self):
        # Basic test of join_by no_r2postfix
        a, b = self.a, self.b

        test = join_by(
            'a', a, b, r1postfix='1', r2postfix='', jointype='inner')
        control = np.array([(0, 50, 65, 100, 100), (1, 51, 66, 101, 101),
                            (2, 52, 67, 102, 102), (3, 53, 68, 103, 103),
                            (4, 54, 69, 104, 104), (5, 55, 70, 105, 105),
                            (6, 56, 71, 106, 106), (7, 57, 72, 107, 107),
                            (8, 58, 73, 108, 108), (9, 59, 74, 109, 109)],
                           dtype=[('a', int), ('b1', int), ('b', int),
                                  ('c', int), ('d', int)])
        assert_equal(test, control)
Example #38
0
	def join_by(self, r2, key, jointype='inner', r1postfix='1', r2postfix='2',
                defaults=None, asrecarray=False, asTable=True):
		"""
		Join arrays `r1` and `r2` on key `key`.

		The key should be either a string or a sequence of string corresponding
		to the fields used to join the array.
		An exception is raised if the `key` field cannot be found in the two input
		arrays.
		Neither `r1` nor `r2` should have any duplicates along `key`: the presence
		of duplicates will make the output quite unreliable. Note that duplicates
		are not looked for by the algorithm.

		INPUTS:
			key 	   {str, seq} 	A string or a sequence of strings
						corresponding to the fields used for comparison.
			r2 	   [Table]	Table to join with

		KEYWORDS:
			jointype   [str]	{'inner', 'outer', 'leftouter'}
			    'inner'     : returns the elements common to both r1 and r2.
			    'outer'     : returns the common elements as well as the elements
					  of r1 not in r2 and the elements of not in r2.
			    'leftouter' : returns the common elements and the elements of r1 not in r2.

			r1postfix  [str]	String appended to the names of the fields of r1 that are present in r2
			r2postfix  [str] 	String appended to the names of the fields of r2 that are present in r1
			defaults   [dict]	Dictionary mapping field names to the corresponding default values.
			asrecarray [bool] 	Whether to return a recarray or just a flexible-type ndarray.
			asTable	   [bool] 	Whether to return a Table (default).

		*Notes*:
		- The output is sorted along the key.
		- A temporary array is formed by dropping the fields not in the key for the
		  two arrays and concatenating the result. This array is then sorted, and
		  the common entries selected. The output is constructed by filling the fields
		  with the selected entries. Matching is not preserved if there are some
		  duplicates...

		"""
		#TODO: return a Table by default
		if asTable:
			asrecarray = True
		arr = recfunctions.join_by(key, self, r2, jointype=jointype,
				r1postfix=r1postfix, r2postfix=r2postfix,
				defaults=defaults, usemask=False,
				asrecarray=asrecarray)

		return arr
Example #39
0
def merge(left_array: np.ndarray,
          right_array: np.ndarray,
          left_on: str,
          right_on: str,
          how: "{inner, outer, leftouter}" = "inner",
          left_postscript="_left",
          right_postscript="_right") -> np.ndarray:
    """
    Multiple methods of merging data on unique columns. This method is not optimised and makes use of NumPy's recfunctions. 
    This method achieves everything that can be done with Pandas' merge fucntion.

    :param left_array: np.ndarray, the left concatenating array.
    :param right_array: np.ndarray, the right concatenating array.
    :param left_on: str, the left unique column to merge on.
    :param right_on: str, the right unique column to merge on.
    :param how: {inner, outer, leftouter} str, 
        If 'inner', returns the elements common to both r1 and r2.
        If 'outer', returns the common elements as well as the elements of
        r1 not in r2 and the elements of not in r2.
        If 'leftouter', returns the common elements and the elements of r1
        not in r2.
    :param left_postscript: str, appended to the names of the fields of left_array that are present
        in right_array but absent of the key.
    :param right_postscript: str, appended to the names of the fields of right_array that are present
        in left_array but absent of the key.
    :return : np.ndarray, newly merged array.
    """

    # DATA
    if how not in ["inner", "outer", "leftouter"]:
        raise ValueError(
            "how has to be set to either: 'inner','outer','leftouter'")
    if left_on != right_on:
        if left_on in right_array.dtype.names:
            right_array = drop(right_array, left_on)

        mapping = {right_on: left_on}
        # LOGIC
        right_array.dtype.names = [
            mapping.get(word, word) for word in right_array.dtype.names
        ]

    return rfn.join_by(left_on,
                       left_array,
                       right_array,
                       jointype=how,
                       usemask=False,
                       r1postfix=left_postscript,
                       r2postfix=right_postscript)
Example #40
0
    def test_padded_dtype(self):
        dt = np.dtype('i1,f4', align=True)
        dt.names = ('k', 'v')
        assert_(len(dt.descr), 3)  # padding field is inserted

        a = np.array([(1, 3), (3, 2)], dt)
        b = np.array([(1, 1), (2, 2)], dt)
        res = join_by('k', a, b)

        # no padding fields remain
        expected_dtype = np.dtype([
            ('k', 'i1'), ('v1', 'f4'), ('v2', 'f4')
        ])

        assert_equal(res.dtype, expected_dtype)
Example #41
0
def build_utc_array(source, sink, start, end):

    source_prices = retrieve_node_data(source, start, end)
    sink_prices = retrieve_node_data(sink, start, end)

    source_data = []

    for element in source_prices:
        source_data.append((element[0].replace(tzinfo=pytz.timezone('EST')),
                            element[1],
                            element[2],
                            element[5]))

    sink_data = []

    for element in sink_prices:
        sink_data.append((element[0].replace(tzinfo=pytz.timezone('EST')),
                          element[1],
                          element[2],
                          element[5]))

    sink_dt = numpy.dtype([('time_id', 'S32'),
                      ('sink_node_id', 'i8'),
                      ('sink_rt_lmp', 'f8'),
                      ('sink_da_lmp', 'f8')])

    source_dt = numpy.dtype([('time_id', 'S32'),
                      ('source_node_id', 'i8'),
                      ('source_rt_lmp', 'f8'),
                      ('source_da_lmp', 'f8')])


    sink_array = numpy.array(sink_data, dtype=sink_dt)
    source_array = numpy.array(source_data, dtype=source_dt)

    joined = rfn.join_by('time_id', sink_array,
                                    source_array,
                                    jointype='inner', usemask=False)

    rt_congestion_rounded = numpy.round(joined['sink_rt_lmp'] - joined['source_rt_lmp'], 2)
    da_congestion_rounded = numpy.round(joined['sink_da_lmp'] - joined['source_da_lmp'], 2)
    profit_rounded = numpy.round(rt_congestion_rounded - da_congestion_rounded, 2)

    joined = rfn.append_fields(joined, 'rt_congestion', data=rt_congestion_rounded)
    joined = rfn.append_fields(joined, 'da_congestion', data=da_congestion_rounded)
    joined = rfn.append_fields(joined, 'profit', data=profit_rounded)

    return joined[['time_id', 'rt_congestion']]
Example #42
0
    def test_inner_join(self):
        # Basic test of join_by
        a, b = self.a, self.b

        test = join_by("a", a, b, jointype="inner")
        control = np.array(
            [
                (5, 55, 65, 105, 100),
                (6, 56, 66, 106, 101),
                (7, 57, 67, 107, 102),
                (8, 58, 68, 108, 103),
                (9, 59, 69, 109, 104),
            ],
            dtype=[("a", int), ("b1", int), ("b2", int), ("c", int), ("d", int)],
        )
        assert_equal(test, control)
Example #43
0
    def test_leftouter_join(self):
        a, b = self.a, self.b

        test = join_by(('a', 'b'), a, b, 'leftouter')
        control = ma.array([(0, 50, 100, -1), (1, 51, 101, -1),
                            (2, 52, 102, -1), (3, 53, 103, -1),
                            (4, 54, 104, -1), (5, 55, 105, -1),
                            (6, 56, 106, -1), (7, 57, 107, -1),
                            (8, 58, 108, -1), (9, 59, 109, -1)],
                           mask=[(0, 0, 0, 1), (0, 0, 0, 1),
                                 (0, 0, 0, 1), (0, 0, 0, 1),
                                 (0, 0, 0, 1), (0, 0, 0, 1),
                                 (0, 0, 0, 1), (0, 0, 0, 1),
                                 (0, 0, 0, 1), (0, 0, 0, 1)],
                           dtype=[('a', int), ('b', int), ('c', int), ('d', int)])
        assert_equal(test, control)
    def test_inner_join(self):
        # Basic test of join_by
        a, b = self.a, self.b

        test = join_by("a", a, b, jointype="inner")
        control = np.array(
            [
                (5, 55, 65, 105, 100),
                (6, 56, 66, 106, 101),
                (7, 57, 67, 107, 102),
                (8, 58, 68, 108, 103),
                (9, 59, 69, 109, 104),
            ],
            dtype=[("a", int), ("b1", int), ("b2", int), ("c", int),
                   ("d", int)],
        )
        assert_equal(test, control)
Example #45
0
def CalcDistArray(inval):
    '''
    Calculate Distance Array using the Generate Near Function.
    
    Called By:
    CalcDistancesLayer
    CalcDistanceLayerMultiple
    
    Calls:
    
    Arguments:
    inval = [UPConfig,layername]
    
    
    Returns:
    Distarray: [OBJECTID, distance, BaseGeom_id, attracter]
    
    '''

    UPConfig = inval[0]
    layername = inval[1]

    gn_table = arcpy.GenerateNearTable_analysis(
        os.path.join(UPConfig['paths']['dbpath'], UPConfig['paths']['dbname'],
                     UPConfig['BaseGeom_cent']),
        os.path.join(UPConfig['paths']['dbpath'], UPConfig['paths']['dbname'],
                     layername), 'in_memory/temp_up_dist', "", "", "",
        "CLOSEST")
    # Convert gn_table to a Numpy Array
    gn_array = arcpy.da.TableToNumPyArray(gn_table, ['IN_FID', 'NEAR_DIST'])
    desc = arcpy.Describe(
        os.path.join(UPConfig['paths']['dbpath'], UPConfig['paths']['dbname'],
                     UPConfig['BaseGeom_cent']))
    oidfieldname = desc.OIDFieldName
    gn_array.dtype.names = str(oidfieldname), 'distance'
    bg_array = arcpy.da.TableToNumPyArray(
        os.path.join(UPConfig['paths']['dbpath'], UPConfig['paths']['dbname'],
                     UPConfig['BaseGeom_cent']),
        [oidfieldname, UPConfig['BaseGeom_id']])
    arr = rfn.join_by(oidfieldname, gn_array, bg_array, 'outer')
    arr = AddNumpyField(arr, [('attracter', '<a50')])
    for ln in arr:
        ln['attracter'] = layername
    arcpy.Delete_management('in_memory/temp_up_dist')
    return (arr)
Example #46
0
def MergeArrays(arrlist, joinfldname):
    '''
    Merge a list of arrays into a single array based on a common field and return
    
    Called by:
    
    
    Arguments:
    arrlist: a list of numpy arrays
    joinfldname: the name of the field to use for the join
    
    '''
    oarr = arrlist[0]
    for res in arrlist[1:]:
        #Logger("Merging: dist_{ts}_{att}".format(att=res[0],ts = res[1][0]))
        arr = res
        oarr = rfn.join_by(str(joinfldname), oarr, arr, 'outer')
    return (oarr)
def merge_cort(data, cortisol_filename):

    cort_data = np.genfromtxt(cortisol_filename,
                              dtype=None,
                              names=True,
                              delimiter='\t')

    names = list(cort_data.dtype.names)

    # Find all the columns in cort_data that have 'av' in their title
    # and not '_mask'
    drop_names = names[8:]

    cort_data = nprf.drop_fields(cort_data,
                                 drop_names,
                                 usemask=False,
                                 asrecarray=True)

    data = nprf.join_by('SubID',
                        data,
                        cort_data,
                        jointype='leftouter',
                        r1postfix='KW',
                        r2postfix='KW2',
                        usemask=False,
                        asrecarray=True)

    # Bizzarely, the join_by function pads with the biggest numbers it can think of!
    # So we're going to replace everything over 999 with 999
    for name in names[1:8]:
        data[name][data[name] > 999] = 999

    # Define a UsableCort field: 1 if ANY of the cortisol values are not 999
    cort_array = np.vstack([data[name] for name in names[1:8]])
    usable_cort_array = np.ones(cort_array.shape[1])
    usable_cort_array[np.any(cort_array <> 999, axis=0)] = 1

    data = nprf.append_fields(base=data,
                              names='UsableCort',
                              data=usable_cort_array,
                              usemask=False)

    return data
Example #48
0
    def test_two_keys_two_vars(self):
        a = np.array(list(zip(np.tile([10, 11], 5), np.repeat(np.arange(5), 2),
                              np.arange(50, 60), np.arange(10, 20))),
                     dtype=[('k', int), ('a', int), ('b', int), ('c', int)])

        b = np.array(list(zip(np.tile([10, 11], 5), np.repeat(np.arange(5), 2),
                              np.arange(65, 75), np.arange(0, 10))),
                     dtype=[('k', int), ('a', int), ('b', int), ('c', int)])

        control = np.array([(10, 0, 50, 65, 10, 0), (11, 0, 51, 66, 11, 1),
                            (10, 1, 52, 67, 12, 2), (11, 1, 53, 68, 13, 3),
                            (10, 2, 54, 69, 14, 4), (11, 2, 55, 70, 15, 5),
                            (10, 3, 56, 71, 16, 6), (11, 3, 57, 72, 17, 7),
                            (10, 4, 58, 73, 18, 8), (11, 4, 59, 74, 19, 9)],
                           dtype=[('k', int), ('a', int), ('b1', int),
                                  ('b2', int), ('c1', int), ('c2', int)])
        test = join_by(
            ['a', 'k'], a, b, r1postfix='1', r2postfix='2', jointype='inner')
        assert_equal(test.dtype, control.dtype)
        assert_equal(test, control)
Example #49
0
def _desc_dither_columns(data, dithers):
    logging.info('adding dithers')
    d = join_by('observationId',
                data,
                dithers,
                jointype='inner',
                defaults={
                    'descDitheredRA': 0.,
                    'descDitheredDec': 0.,
                    'descDitheredRotTelPos': 0.
                },
                usemask=False)
    #    nm = []
    #    for nn in d.dtype.names:
    #        if nn == 'observationId':
    #            nm.append(nn)
    #        else:
    #            nm.append(nn[:-1])
    #    d.dtype.names = nm
    d['rotTelPos'] = d['descDitheredRotTelPos']
    return d
Example #50
0
    def test_no_r2postfix(self):
        # Basic test of join_by no_r2postfix
        a, b = self.a, self.b

        test = join_by("a", a, b, r1postfix="1", r2postfix="", jointype="inner")
        control = np.array(
            [
                (0, 50, 65, 100, 100),
                (1, 51, 66, 101, 101),
                (2, 52, 67, 102, 102),
                (3, 53, 68, 103, 103),
                (4, 54, 69, 104, 104),
                (5, 55, 70, 105, 105),
                (6, 56, 71, 106, 106),
                (7, 57, 72, 107, 107),
                (8, 58, 73, 108, 108),
                (9, 59, 74, 109, 109),
            ],
            dtype=[("a", int), ("b1", int), ("b", int), ("c", int), ("d", int)],
        )
        assert_equal(test, control)
Example #51
0
    def filter_effects_against_data(self):
        """
        Take a dataset and
        filter out all the snps that are not in the dataset. Also flip the
        alleles so that the EFFECT alelles is the REF allele 
        """
        # First, filter alleles: 
        new_effects=nprec.join_by(["CHR", "POS"], self.effects, 
                                  self.data.snp[["CHR", "POS", "REF", "ALT"]],
                                  usemask=False, jointype="inner")

        print( "Removed "+str(len(self.effects)-len(new_effects))+
               " effect SNPS not in data",file=sys.stderr)
        if not len(new_effects):
            raise Exception("No effect SNPs in reference data")

        if any(new_effects[("EFFECT")]=="N") or any(new_effects[("OTHER")]=="N"):
            raise Exception("Effects corrupted. Either .gwas file or frequency file is bad (possibly contains duplicates)")

        flipped=0
        removed=0
        for rec in new_effects:
            if rec["EFFECT"]==rec["REF"] and rec["OTHER"]==rec["ALT"]:
                pass
            elif rec["OTHER"]==rec["REF"] and rec["EFFECT"]==rec["ALT"]:
                flipped+=1
                rec["OTHER"]=rec["ALT"]
                rec["EFFECT"]=rec["REF"]
                rec["BETA"]=-rec["BETA"]
            else:
                removed+=1
                rec["EFFECT"]=rec["OTHER"]="N"

        new_effects=new_effects[new_effects["EFFECT"]!="N"]
        print( "Removed "+str(removed)+" non-matching alleles",file=sys.stderr)
        print( "Flipped "+str(flipped)+" alleles",file=sys.stderr)

        self.effects=new_effects[["CHR", "POS", "EFFECT", "OTHER", "BETA"]]
Example #52
0

data_all = np.copy(data)


#================================================================================================


result, indexes = np.unique(ngc_to_messier['ngc'], return_index=True)
ngc_to_messier = ngc_to_messier[indexes]

result, indexes = np.unique(data['ngc'], return_index=True)
data = data[indexes]


data = rfn.join_by('ngc', data, ngc_to_messier, jointype='leftouter', usemask=False)

data = data[(data["type"] == "OC") | (data["type"] == "C+N") | (data["ngc"] == PLEIADES_MAGIC_ID) | (data["ngc"] == HYADES_MAGIC_ID) | (data["ngc"]==ORION_NEBULA_NGC)]

data["messier"][data["ngc"] == PLEIADES_MAGIC_ID] = "M  45 (pleiades)"
data["messier"][data["ngc"] == HYADES_MAGIC_ID] = "hyades"
data["messier"][data["messier"] == "M  44"] = "M  44 (beehive)"
data["messier"][data["messier"] == "M  42"] = "M  42 (orion nb)"

#================================================================================================


data = np.sort(data, order=['messier'])


#================================================================================================
Example #53
0

result, indexes = np.unique(dt['ngc'], return_index=True)
dt = dt[indexes]

result, indexes = np.unique(nebula_distance['ngc'], return_index=True)
nebula_distance = nebula_distance[indexes]

dt = np.sort(dt, order=['ngc'])
nebula_distance = np.sort(nebula_distance, order=['ngc'])


#================================================================================================


dt = rfn.join_by('ngc', dt, nebula_distance, jointype='leftouter', usemask=False, defaults={'dist': 0})

result, indexes = np.unique(dt['messier'], return_index=True)
dt = dt[indexes]


#================================================================================================


fill_with_zeros = np.zeros(dt.size)
dt = rfn.append_fields(dt, ['x', 'y', 'z'], data=[fill_with_zeros, fill_with_zeros, fill_with_zeros], usemask=False)

dt["glong"] = np.radians(dt["glong"])
dt["glat"] = np.radians(dt["glat"])

dt["x"] = dt["dist"] * np.cos(dt["glat"]) * np.cos(dt["glong"])
Example #54
0
            ends = np.append(ends, ends[-1]+mod)
    else:
        starts = None
        ends = None
   

    starts, ends = suchyta_utils.mpi.Scatter(starts, ends)
    for i in range(len(starts)):
        z = fitsio.read(zzfile, ext=-1, rows=np.arange(starts[i],ends[i]))
        df = fitsio.FITS(dfile, 'r')
        print starts[i], ends[i]
        w = df[1].where('coadd_objects_id >= %i && coadd_objects_id <= %i'%(z['coadd_objects_id'][0],z['coadd_objects_id'][-1]))
        if len(w)==0:
            continue
        d = df[1][w]
        dz = rec.join_by('coadd_objects_id', d, z, usemask=False)
        dz = rec.append_fields(dz, 'field', [field]*len(dz))

        file = os.path.join(outdir, '%i-%i.fits'%(starts[i],ends[i]))
        if os.path.exists(file):
            os.remove(file)
        f = fitsio.FITS(file, 'rw')
        f.write(dz)

    MPI.COMM_WORLD.Barrier()
    if MPI.COMM_WORLD.Get_rank()==0:
        outfile = os.path.join(outdir, '%s.fits'%(name))
        if os.path.exists(outfile):
            os.remove(outfile)
        files = os.listdir(outdir)
        of = fitsio.FITS(outfile, 'rw')
Example #55
0
                            dtype=[('hd', 'int'), ('con', 'S20')])


#================================================================================================


result, indexes = np.unique(data['hd'], return_index=True)
data = data[indexes]

result, indexes = np.unique(names['hd'], return_index=True)
names = names[indexes]

result, indexes = np.unique(constellations['hd'], return_index=True)
constellations = constellations[indexes]

data = rfn.join_by('hd', data, names, jointype='leftouter', usemask=False, defaults={'name': '?'})

data = rfn.join_by('hd', data, constellations, jointype='leftouter',usemask=False, defaults={'con': '?'})

fill_with_zeros = np.zeros(data.size)

data = rfn.append_fields(data, ['x', 'y', 'z', 'dist'],
                       [fill_with_zeros, fill_with_zeros, fill_with_zeros, fill_with_zeros], usemask=False)


#================================================================================================


data = data[data["parallax"] != 0]

data["parallax"] = np.absolute(data["parallax"])
import numpy as np
import numpy.lib.recfunctions as rfn
import pandas as pd

#The logic in this file takes many the data files generated by IBCombineHistoricalData
#"joins" them by datetime to create a file with one date-time column and numerous data columns
if __name__ == "__main__":
    #MANUALLY SET THESE FILES FOR JOINING
    filesToJoin = [
        "C:/Dropbox/CninSrc/JTS/TWS API/samples/Java/Data/CAD_BID.txt",
        "C:/Dropbox/CninSrc/JTS/TWS API/samples/Java/Data/CAD_ASK.txt"
    ]
    #MANUALLY SET THIS FOR OUTPUT FILE
    outFile = "C:/Dropbox/CninSrc/JTS/TWS API/samples/Java/Data/JOINED.txt"
    #MANUALLY SET THIS HEADER SO EASIER TO REMEMBER COLUMNS
    headerTxt = "Time,CAD_BID,CAD_ASK"

    joined = []
    for file in filesToJoin:
        print "Handing: " + file
        data = np.genfromtxt(file, delimiter = ',', dtype=np.dtype([('time',np.long), ('price', np.float)]))
        print "Joining"
        if len(joined) == 0:
            joined = data
        else:
            joined = rfn.join_by('time', joined, data, jointype='inner', usemask=False)

    np.savetxt(outFile, joined, delimiter=',', fmt="%s", header=headerTxt, comments="")
Example #57
0
def fetch_localizer_contrasts(
    contrasts,
    n_subjects=None,
    get_tmaps=False,
    get_masks=False,
    get_anats=False,
    data_dir=None,
    url=None,
    resume=True,
    verbose=1,
):
    """Download and load Brainomics Localizer dataset (94 subjects).

    "The Functional Localizer is a simple and fast acquisition
    procedure based on a 5-minute functional magnetic resonance
    imaging (fMRI) sequence that can be run as easily and as
    systematically as an anatomical scan. This protocol captures the
    cerebral bases of auditory and visual perception, motor actions,
    reading, language comprehension and mental calculation at an
    individual level. Individual functional maps are reliable and
    quite precise. The procedure is decribed in more detail on the
    Functional Localizer page."
    (see http://brainomics.cea.fr/localizer/)

    "Scientific results obtained using this dataset are described in
    Pinel et al., 2007" [1]

    Parameters
    ----------
    contrasts: list of str
        The contrasts to be fetched (for all 94 subjects available).
        Allowed values are::

            {"checkerboard",
            "horizontal checkerboard",
            "vertical checkerboard",
            "horizontal vs vertical checkerboard",
            "vertical vs horizontal checkerboard",
            "sentence listening",
            "sentence reading",
            "sentence listening and reading",
            "sentence reading vs checkerboard",
            "calculation (auditory cue)",
            "calculation (visual cue)",
            "calculation (auditory and visual cue)",
            "calculation (auditory cue) vs sentence listening",
            "calculation (visual cue) vs sentence reading",
            "calculation vs sentences",
            "calculation (auditory cue) and sentence listening",
            "calculation (visual cue) and sentence reading",
            "calculation and sentence listening/reading",
            "calculation (auditory cue) and sentence listening vs "
            "calculation (visual cue) and sentence reading",
            "calculation (visual cue) and sentence reading vs checkerboard",
            "calculation and sentence listening/reading vs button press",
            "left button press (auditory cue)",
            "left button press (visual cue)",
            "left button press",
            "left vs right button press",
            "right button press (auditory cue)",
            "right button press (visual cue)",
            "right button press",
            "right vs left button press",
            "button press (auditory cue) vs sentence listening",
            "button press (visual cue) vs sentence reading",
            "button press vs calculation and sentence listening/reading"}

        or equivalently on can use the original names::

            {"checkerboard",
            "horizontal checkerboard",
            "vertical checkerboard",
            "horizontal vs vertical checkerboard",
            "vertical vs horizontal checkerboard",
            "auditory sentences",
            "visual sentences",
            "auditory&visual sentences",
            "visual sentences vs checkerboard",
            "auditory calculation",
            "visual calculation",
            "auditory&visual calculation",
            "auditory calculation vs auditory sentences",
            "visual calculation vs sentences",
            "auditory&visual calculation vs sentences",
            "auditory processing",
            "visual processing",
            "visual processing vs auditory processing",
            "auditory processing vs visual processing",
            "visual processing vs checkerboard",
            "cognitive processing vs motor",
            "left auditory click",
            "left visual click",
            "left auditory&visual click",
            "left auditory & visual click vs right auditory&visual click",
            "right auditory click",
            "right visual click",
            "right auditory&visual click",
            "right auditory & visual click vs left auditory&visual click",
            "auditory click vs auditory sentences",
            "visual click vs visual sentences",
            "auditory&visual motor vs cognitive processing"}

    n_subjects: int, optional
        The number of subjects to load. If None is given,
        all 94 subjects are used.

    get_tmaps: boolean
        Whether t maps should be fetched or not.

    get_masks: boolean
        Whether individual masks should be fetched or not.

    get_anats: boolean
        Whether individual structural images should be fetched or not.

    data_dir: string, optional
        Path of the data directory. Used to force data storage in a specified
        location.

    url: string, optional
        Override download URL. Used for test only (or if you setup a mirror of
        the data).

    resume: bool
        Whether to resume download of a partly-downloaded file.

    verbose: int
        Verbosity level (0 means no message).

    Returns
    -------
    data: Bunch
        Dictionary-like object, the interest attributes are :

        - 'cmaps': string list
            Paths to nifti contrast maps
        - 'tmaps' string list (if 'get_tmaps' set to True)
            Paths to nifti t maps
        - 'masks': string list
            Paths to nifti files corresponding to the subjects individual masks
        - 'anats': string
            Path to nifti files corresponding to the subjects structural images

    References
    ----------
    Pinel, Philippe, et al.
    "Fast reproducible identification and large-scale databasing of
    individual functional cognitive networks."
    BMC neuroscience 8.1 (2007): 91.

    """
    if isinstance(contrasts, _basestring):
        raise ValueError("Contrasts should be a list of strings, but " 'a single string was given: "%s"' % contrasts)
    if n_subjects is None:
        n_subjects = 94  # 94 subjects available
    if (n_subjects > 94) or (n_subjects < 1):
        warnings.warn("Wrong value for 'n_subjects' (%d). The maximum " "value will be used instead ('n_subjects=94')")
        n_subjects = 94  # 94 subjects available

    # we allow the user to use alternatives to Brainomics contrast names
    contrast_name_wrapper = {
        # Checkerboard
        "checkerboard": "checkerboard",
        "horizontal checkerboard": "horizontal checkerboard",
        "vertical checkerboard": "vertical checkerboard",
        "horizontal vs vertical checkerboard": "horizontal vs vertical checkerboard",
        "vertical vs horizontal checkerboard": "vertical vs horizontal checkerboard",
        # Sentences
        "sentence listening": "auditory sentences",
        "sentence reading": "visual sentences",
        "sentence listening and reading": "auditory&visual sentences",
        "sentence reading vs checkerboard": "visual sentences vs checkerboard",
        # Calculation
        "calculation (auditory cue)": "auditory calculation",
        "calculation (visual cue)": "visual calculation",
        "calculation (auditory and visual cue)": "auditory&visual calculation",
        "calculation (auditory cue) vs sentence listening": "auditory calculation vs auditory sentences",
        "calculation (visual cue) vs sentence reading": "visual calculation vs sentences",
        "calculation vs sentences": "auditory&visual calculation vs sentences",
        # Calculation + Sentences
        "calculation (auditory cue) and sentence listening": "auditory processing",
        "calculation (visual cue) and sentence reading": "visual processing",
        "calculation (visual cue) and sentence reading vs "
        "calculation (auditory cue) and sentence listening": "visual processing vs auditory processing",
        "calculation (auditory cue) and sentence listening vs "
        "calculation (visual cue) and sentence reading": "auditory processing vs visual processing",
        "calculation (visual cue) and sentence reading vs checkerboard": "visual processing vs checkerboard",
        "calculation and sentence listening/reading vs button press": "cognitive processing vs motor",
        # Button press
        "left button press (auditory cue)": "left auditory click",
        "left button press (visual cue)": "left visual click",
        "left button press": "left auditory&visual click",
        "left vs right button press": "left auditory & visual click vs " + "right auditory&visual click",
        "right button press (auditory cue)": "right auditory click",
        "right button press (visual cue)": "right visual click",
        "right button press": "right auditory & visual click",
        "right vs left button press": "right auditory & visual click " + "vs left auditory&visual click",
        "button press (auditory cue) vs sentence listening": "auditory click vs auditory sentences",
        "button press (visual cue) vs sentence reading": "visual click vs visual sentences",
        "button press vs calculation and sentence listening/reading": "auditory&visual motor vs cognitive processing",
    }
    allowed_contrasts = list(contrast_name_wrapper.values())
    # convert contrast names
    contrasts_wrapped = []
    # get a unique ID for each contrast. It is used to give a unique name to
    # each download file and avoid name collisions.
    contrasts_indices = []
    for contrast in contrasts:
        if contrast in allowed_contrasts:
            contrasts_wrapped.append(contrast)
            contrasts_indices.append(allowed_contrasts.index(contrast))
        elif contrast in contrast_name_wrapper:
            name = contrast_name_wrapper[contrast]
            contrasts_wrapped.append(name)
            contrasts_indices.append(allowed_contrasts.index(name))
        else:
            raise ValueError("Contrast '%s' is not available" % contrast)

    # It is better to perform several small requests than a big one because:
    # - Brainomics server has no cache (can lead to timeout while the archive
    #   is generated on the remote server)
    # - Local (cached) version of the files can be checked for each contrast
    opts = {"uncompress": True}
    subject_ids = ["S%02d" % s for s in range(1, n_subjects + 1)]
    subject_id_max = subject_ids[-1]
    data_types = ["c map"]
    if get_tmaps:
        data_types.append("t map")
    rql_types = str.join(", ", ['"%s"' % x for x in data_types])
    root_url = "http://brainomics.cea.fr/localizer/"

    base_query = (
        "Any X,XT,XL,XI,XF,XD WHERE X is Scan, X type XT, "
        "X concerns S, "
        "X label XL, X identifier XI, "
        "X format XF, X description XD, "
        'S identifier <= "%s", ' % (subject_id_max,) + 'X type IN(%(types)s), X label "%(label)s"'
    )

    urls = [
        "%sbrainomics_data_%d.zip?rql=%s&vid=data-zip"
        % (root_url, i, _urllib.parse.quote(base_query % {"types": rql_types, "label": c}, safe=",()"))
        for c, i in zip(contrasts_wrapped, contrasts_indices)
    ]
    filenames = []
    for subject_id in subject_ids:
        for data_type in data_types:
            for contrast_id, contrast in enumerate(contrasts_wrapped):
                name_aux = str.replace(str.join("_", [data_type, contrast]), " ", "_")
                file_path = os.path.join("brainomics_data", subject_id, "%s.nii.gz" % name_aux)
                file_tarball_url = urls[contrast_id]
                filenames.append((file_path, file_tarball_url, opts))
    # Fetch masks if asked by user
    if get_masks:
        urls.append(
            "%sbrainomics_data_masks.zip?rql=%s&vid=data-zip"
            % (root_url, _urllib.parse.quote(base_query % {"types": '"boolean mask"', "label": "mask"}, safe=",()"))
        )
        for subject_id in subject_ids:
            file_path = os.path.join("brainomics_data", subject_id, "boolean_mask_mask.nii.gz")
            file_tarball_url = urls[-1]
            filenames.append((file_path, file_tarball_url, opts))
    # Fetch anats if asked by user
    if get_anats:
        urls.append(
            "%sbrainomics_data_anats.zip?rql=%s&vid=data-zip"
            % (root_url, _urllib.parse.quote(base_query % {"types": '"normalized T1"', "label": "anatomy"}, safe=",()"))
        )
        for subject_id in subject_ids:
            file_path = os.path.join("brainomics_data", subject_id, "normalized_T1_anat_defaced.nii.gz")
            file_tarball_url = urls[-1]
            filenames.append((file_path, file_tarball_url, opts))
    # Fetch subject characteristics (separated in two files)
    if url is None:
        url_csv = "%sdataset/cubicwebexport.csv?rql=%s&vid=csvexport" % (
            root_url,
            _urllib.parse.quote("Any X WHERE X is Subject"),
        )
        url_csv2 = "%sdataset/cubicwebexport2.csv?rql=%s&vid=csvexport" % (
            root_url,
            _urllib.parse.quote(
                "Any X,XI,XD WHERE X is QuestionnaireRun, " "X identifier XI, X datetime " "XD", safe=","
            ),
        )
    else:
        url_csv = "%s/cubicwebexport.csv" % url
        url_csv2 = "%s/cubicwebexport2.csv" % url
    filenames += [("cubicwebexport.csv", url_csv, {}), ("cubicwebexport2.csv", url_csv2, {})]

    # Actual data fetching
    dataset_name = "brainomics_localizer"
    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose)
    fdescr = _get_dataset_descr(dataset_name)
    files = _fetch_files(data_dir, filenames, verbose=verbose)
    anats = None
    masks = None
    tmaps = None
    # combine data from both covariates files into one single recarray
    from numpy.lib.recfunctions import join_by

    ext_vars_file2 = files[-1]
    csv_data2 = np.recfromcsv(ext_vars_file2, delimiter=";")
    files = files[:-1]
    ext_vars_file = files[-1]
    csv_data = np.recfromcsv(ext_vars_file, delimiter=";")
    files = files[:-1]
    # join_by sorts the output along the key
    csv_data = join_by("subject_id", csv_data, csv_data2, usemask=False, asrecarray=True)[:n_subjects]
    if get_anats:
        anats = files[-n_subjects:]
        files = files[:-n_subjects]
    if get_masks:
        masks = files[-n_subjects:]
        files = files[:-n_subjects]
    if get_tmaps:
        tmaps = files[1::2]
        files = files[::2]
    return Bunch(cmaps=files, tmaps=tmaps, masks=masks, anats=anats, ext_vars=csv_data, description=fdescr)
Example #58
0
def gc_txt2fits(fname_base):
	f = open('%s.txt' % fname_base, 'r')
	txt = f.read()
	f.close()
	
	col_fmt = []
	col_len = []
	
	col_fmt.append([('ID', 'S11'), ('Name', 'S12'),
	                ('RAh', 'i1'), ('RAm', 'i1'), ('RAs', 'f4'),
	                ('DECh', 'i1'), ('DECm', 'i1'), ('DECs', 'f4'),
	                ('l', 'f4'), ('b', 'f4'),
	                ('R_Sun', 'f4'), ('R_gc', 'f4'),
	                ('X', 'f4'), ('Y', 'f4'), ('Z', 'f4')])
	
	col_fmt.append([('ID', 'S11'),
	                ('FeH', 'f4'), ('FeH_wt', 'i2'),
	                ('EBV', 'f4'), ('VHB', 'f4'),
	                ('DM_V', 'f4'), ('V_t', 'f4'), ('M_Vt', 'f4'),
	                ('UB', 'f4'), ('BV', 'f4'),
	                ('VR', 'f4'), ('RI', 'f4'),
	                ('spt', 'S5'),
	                ('ellip', 'f4')])
	
	col_fmt.append([('ID', 'S11'),
	                ('v_r', 'f4'), ('v_r_err', 'f4'),
	                ('v_LSR', 'f4'),
	                ('sig_v', 'f4'), ('sig_v_err', 'f4'),
	                ('c', 'f4'), ('r_c', 'f4'), ('r_h', 'f4'),
	                ('mu_V', 'f4'), ('rho_0', 'f4'),
	                ('log_tc', 'f4'), ('log_th', 'f4')])
	
	'''
	comment_unit = {('ID': 'GLOBULAR CLUSTER ID'),
	                ('Name': 'NAME OF GC'),
	                ('RAh': 'RA HOUR'),
	                ('RAm': 'RA MINUTE'),
	                ('RAs': 'RA SECOND'),
	                ('DECh': 'DEC HOUR'),
	                ('DECm': 'DEC MINUTE'),
	                ('DECs': 'DEC SECOND'),
	                ('l': 'GALACTIC LONGITUDE'),
	                ('b': 'GALACTIC LATITUDE'),
	                ('R_Sun': 'DIST FROM SUN'),
	                ('R_gc': 'DIST FROM GALACTIC CENTER'),
	                ('X': 'CARTESIAN X DISP FROM GAL CENTER'),
	                ('Y': 'CARTESIAN Y DISP FROM GAL CENTER'),
	                ('Z': 'CARTESIAN Z DISP FROM GAL CENTER'),
	                ('FeH': 'METALLICITY'),
	                ('FeH_wt': 'WEIGHT OF FEH MEASUREMENT'),
	                ('EBV': 'B-V EXCESS'),
	                ('VHB': ''),
	                ('DMV': 'DIST MODULUS FROM V BAND'),
	                ('V_t': ''),
	                ('M_Vt': ''),
	                ('UB': 'U-B COLOR'),
	                ('BV': 'B-V COLOR'),
	                ('VR': 'V-R COLOR'),
	                ('RI': 'R-I COLOR'),
	                ('spt': 'INTEGRATED SPECTRAL TYPE'),
	                ('ellip': ''),
	                ('v_r': 'HELIOCENTRIC RADIAL VELOCITY'),
	                ('v_r_err': 'UNCERTAINTY IN v_r'),
	                ('v_LSR': 'RAD VEL RELATIVE TO LSR'),
	                ('sig_v': 'CENTRAL VELOCITY DISP'),
	                ('sig_v_err': 'UNCERTAINTY IN sig_v_err'),
	                ('c': 'CONCENTRATION PARAMETER'),
	                ('r_c': 'RADIUS OF CORE'),
	                ('r_h': 'HALF-LIGHT RADIUS'),
	                ('mu_V': 'V-BAND SURFACE BRIGHTNESS'),
	                ('rho_0': 'SURFACE NUMBER DENSITY'),
	                ('log_tc': 'CORE RELAXATION TIME'),
	                ('log_th': 'MEDIAN RELAXATION TIME')}
	'''
	
	col_len.append([11, 13, 3, 3, 7, 4, 3, 7, 8, 8, 6, 6, 6, 6, 5])
	col_len.append([11, 7, 5, 5, 6, 6, 6, 7, 7, 6, 6, 6, 6, 5])
	col_len.append([11, 8, 6, 8, 8, 7, 8, 8, 8, 7, 7, 7, 5])
	
	formatted_txt = []
	for i,s in enumerate(block_string_by_comments(txt)):
		rows = []
		
		for line in s.splitlines():
			# Ignore comments and blank lines
			line = line.lstrip()
			if len(line) == 0:
				continue
			elif line[0] == '#':
				continue
			
			# Read in columns of constant width
			cols = []
			start = 0
			ncols = 0
			for c in col_len[i]:
				if start + c > len(line):
					break
				tmp = line[start:start+c].lstrip().rstrip()
				if tmp == '':
					tmp = 'NaN'
				cols.append(tmp)
				ncols += 1
				start += c
			
			# Fill in missing columns at end
			for k in xrange(ncols, len(col_len[i])):
				cols.append('NaN')
			
			# Join columns, using tabs as delimiters
			rows.append('\t'.join(cols))
		
		# Join rows, using endlines as delimiters
		formatted_txt.append('\n'.join(rows))
	
	# Convert formatted strings into numpy record arrays
	d = []
	for fmt,s in zip(col_fmt, formatted_txt):
		d.append(np.genfromtxt(StringIO(s), dtype=fmt, delimiter='\t'))
	
	
	# Merge record arrays by name
	out = join_by('ID', d[0], d[1], jointype='outer')
	out = join_by('ID', out, d[2], jointype='outer')
	out['Name'][out['Name'] == 'NaN'] = ''
	out['spt'][out['spt'] == 'NaN'] = ''
	
	# Output record array to FITS file
	'''
	cols = []
	cols.append(pyfits.Column(name='MU', format='%dD' % len(mu), array=mu))
	for i, m in enumerate(maps):
		cols.append(pyfits.Column(name='A_R %d' % i, format='D', array=m))
	
	tbhdu = pyfits.new_table(cols)
	tbhdu.header.update('NESTED', nest, 'Healpix ordering scheme.')
	tbhdu.header.update('NSIDE', hp.npix2nside(maps.shape[1]), 'Healpix nside parameter.')
	
	tbhdu.writeto(fname, clobber=True)
	'''
	
	#hdu = []
	#hdu.append(pyfits.PrimaryHDU(mu))
	#for m in maps:
	#	hdu.append(pyfits.ImageHDU(m))
	#hdulist = pyfits.HDUList(hdu)
	#hdulist.writeto(fname, clobber=True)
	
	try:
		pyfits.writeto('%s.fits' % fname_base, out, clobber=False)
	except IOError, e:
		print e
Example #59
0
    def fetch(self, contrasts=None, n_subjects=None, get_tmaps=False,
              get_masks=False, get_anats=False, url=None,
              resume=True, force=False, verbose=1):
        if n_subjects is None:
            n_subjects = 94  # 94 subjects available
        if (n_subjects > 94) or (n_subjects < 1):
            warnings.warn("Wrong value for \'n_subjects\' (%d). The maximum "
                          "value will be used instead (\'n_subjects=94\')")
            n_subjects = 94  # 94 subjects available

        if contrasts is None:
            contrasts = self.contrast_name_wrapper.values()
        elif isinstance(contrasts, _basestring):
            contrasts = [contrasts]

        allowed_contrasts = list(self.contrast_name_wrapper.values())
        # convert contrast names
        contrasts_wrapped = []
        # get a unique ID for each contrast. It is used to give a unique name to
        # each download file and avoid name collisions.
        contrasts_indices = []
        for contrast in contrasts:
            if contrast in allowed_contrasts:
                contrasts_wrapped.append(contrast)
                contrasts_indices.append(allowed_contrasts.index(contrast))
            elif contrast in self.contrast_name_wrapper:
                name = self.contrast_name_wrapper[contrast]
                contrasts_wrapped.append(name)
                contrasts_indices.append(allowed_contrasts.index(name))
            else:
                raise ValueError("Contrast \'%s\' is not available" % contrast)

        # It is better to perform several small requests than a big one because:
        # - Brainomics server has no cache (can lead to timeout while the archive
        #   is generated on the remote server)
        # - Local (cached) version of the files can be checked for each contrast
        opts = {'uncompress': True}
        subject_ids = ["S%02d" % s for s in range(1, n_subjects + 1)]
        subject_id_max = subject_ids[-1]
        data_types = ["c map"]
        if get_tmaps:
            data_types.append("t map")
        rql_types = str.join(", ", ["\"%s\"" % x for x in data_types])
        root_url = "http://brainomics.cea.fr/localizer/"

        base_query = ("Any X,XT,XL,XI,XF,XD WHERE X is Scan, X type XT, "
                      "X concerns S, "
                      "X label XL, X identifier XI, "
                      "X format XF, X description XD, "
                      'S identifier <= "%s", ' % (subject_id_max, ) +
                      'X type IN(%(types)s), X label "%(label)s"')

        urls = ["%sbrainomics_data_%d.zip?rql=%s&vid=data-zip"
                % (root_url, i,
                   _urllib.parse.quote(base_query % {"types": rql_types,
                                              "label": c},
                                safe=',()'))
                for c, i in zip(contrasts_wrapped, contrasts_indices)]
        filenames = []
        for subject_id in subject_ids:
            for data_type in data_types:
                for contrast_id, contrast in enumerate(contrasts_wrapped):
                    name_aux = str.replace(
                        str.join('_', [data_type, contrast]), ' ', '_')
                    file_path = os.path.join(
                        "brainomics_data", subject_id, "%s.nii.gz" % name_aux)
                    file_tarball_url = urls[contrast_id]
                    filenames.append((file_path, file_tarball_url, opts))
        # Fetch masks if asked by user
        if get_masks:
            urls.append("%sbrainomics_data_masks.zip?rql=%s&vid=data-zip"
                        % (root_url,
                           _urllib.parse.quote(base_query % {"types": '"boolean mask"',
                                                      "label": "mask"},
                                        safe=',()')))
            for subject_id in subject_ids:
                file_path = os.path.join(
                    "brainomics_data", subject_id, "boolean_mask_mask.nii.gz")
                file_tarball_url = urls[-1]
                filenames.append((file_path, file_tarball_url, opts))
        # Fetch anats if asked by user
        if get_anats:
            urls.append("%sbrainomics_data_anats.zip?rql=%s&vid=data-zip"
                        % (root_url,
                           _urllib.parse.quote(base_query % {"types": '"normalized T1"',
                                                      "label": "anatomy"},
                                        safe=',()')))
            for subject_id in subject_ids:
                file_path = os.path.join(
                    "brainomics_data", subject_id,
                    "normalized_T1_anat_defaced.nii.gz")
                file_tarball_url = urls[-1]
                filenames.append((file_path, file_tarball_url, opts))
        # Fetch subject characteristics (separated in two files)
        if url is None:
            url_csv = ("%sdataset/cubicwebexport.csv?rql=%s&vid=csvexport"
                       % (root_url, _urllib.parse.quote("Any X WHERE X is Subject")))
            url_csv2 = ("%sdataset/cubicwebexport2.csv?rql=%s&vid=csvexport"
                        % (root_url,
                           _urllib.parse.quote("Any X,XI,XD WHERE X is QuestionnaireRun, "
                                        "X identifier XI, X datetime "
                                        "XD", safe=',')
                           ))
        else:
            url_csv = "%s/cubicwebexport.csv" % url
            url_csv2 = "%s/cubicwebexport2.csv" % url
        filenames += [("cubicwebexport.csv", url_csv, {}),
                      ("cubicwebexport2.csv", url_csv2, {})]

        # Actual data fetching
        files = self.fetcher.fetch(filenames, resume=resume, force=force, verbose=verbose)
        anats = None
        masks = None
        tmaps = None
        # combine data from both covariates files into one single recarray
        from numpy.lib.recfunctions import join_by
        ext_vars_file2 = files[-1]
        csv_data2 = np.recfromcsv(ext_vars_file2, delimiter=';')
        files = files[:-1]
        ext_vars_file = files[-1]
        csv_data = np.recfromcsv(ext_vars_file, delimiter=';')
        files = files[:-1]
        # join_by sorts the output along the key
        csv_data = join_by('subject_id', csv_data, csv_data2,
                           usemask=False, asrecarray=True)[:n_subjects]
        if get_anats:
            anats = files[-n_subjects:]
            files = files[:-n_subjects]
        if get_masks:
            masks = files[-n_subjects:]
            files = files[:-n_subjects]
        if get_tmaps:
            tmaps = files[1::2]
            files = files[::2]
        return Bunch(cmaps=files, tmaps=tmaps, masks=masks, anats=anats,
                     ext_vars=csv_data)