Python tabarray 예제들, tabular.tabarray Python 예제들

예제 #1

0

파일 보기

파일: metadata.py 프로젝트: winnerczr/StarFlow

def tabularmetadataforms(pathlist,
                         depends_on=WORKING_DE.relative_metadata_dir):
    attlist = ['description', 'author', 'title', 'keywords']
    recs1 = []
    recs2 = []
    for x in pathlist:
        print x
        mdp = metadatapath(x) + '/ProcessedMetaData.pickle'
        if PathExists(mdp):
            M = pickle.load(open(mdp))
            D = {}
            for att in attlist:
                if att in M.keys():
                    D[att] = M[att]
                else:
                    D[att] = ''
            recs1.append((x, ) +
                         tuple([D[att].replace('\n', ' ') for att in attlist]))
            colnames = M['colnames']
            if 'coldescrs' in M.keys():
                coldescrs = [
                    M['coldescrs'][m] if m in M['coldescrs'].keys() else ''
                    for m in colnames
                ]
            else:
                coldescrs = [''] * len(colnames)

            recs2 += zip([x] * len(colnames), colnames, coldescrs)

    X = tb.tabarray(records=recs1, names=['Path'] + attlist)
    Y = tb.tabarray(records=recs2, names=['Path', 'ColName', 'ColDescr'])

    return [X, Y]

예제 #2

0

파일 보기

파일: MetaData.py 프로젝트: yamins81/CGA-Geodjango-APIs

def tabularmetadataforms(pathlist,depends_on = '../System/MetaData/'):
	attlist = ['description','author','title','keywords']
	recs1 = []
	recs2 = []
	for x in pathlist:
		print x
		mdp = metadatapath(x) + '/ProcessedMetaData.pickle'
		if PathExists(mdp):
			M = pickle.load(open(mdp))
			D = {}
			for att in attlist:
				if att in M.keys():
					D[att] = M[att]
				else:
					D[att] = ''
			recs1.append((x,) + tuple([D[att].replace('\n',' ') for att in attlist]))
			colnames = M['colnames']
			if 'coldescrs' in M.keys():
				coldescrs = [M['coldescrs'][m] if m in M['coldescrs'].keys() else ''  for m in colnames]
			else:
				coldescrs = ['']*len(colnames)
			
			recs2 += zip([x]*len(colnames),colnames,coldescrs)		
		
	X = tb.tabarray(records = recs1,names=['Path'] + attlist)
	Y = tb.tabarray(records = recs2,names = ['Path','ColName','ColDescr'])
	
	return [X,Y]

예제 #3

0

파일 보기

def ParseFiles(depends_on=(root_wf + '1To10000.html', root_wf +
                           '10001To20000.html', root_wf + '20001To30000.html',
                           root_wf + '30001To40000.html'),
               creates=root_wf + 'WordFrequencies.csv'):

    Words = []
    Freqs = []
    Rank = []
    for (j, x) in enumerate(depends_on):
        Soup = BeautifulSoup(open(x, 'r'))
        P = Soup.findAll('p')
        count = 0
        for (i, p) in enumerate(P):
            print 'processing', x, ', group', i
            A = p.findAll('a')
            if len(A) > 10:
                C = Contents(p).replace(' = ', ' ').split(' ')
                newwords = C[::2]
                newfreqs = C[1::2]
                Words += newwords
                Freqs += newfreqs
                Rank += range(1 + j * 10000 + count,
                              1 + j * 10000 + count + len(newwords))
                count += len(newwords)

    tb.tabarray(columns=[Words, Freqs, Rank],
                names=['Word', 'Frequency', 'Rank']).saveSV(creates,
                                                            delimiter=',')

예제 #4

0

파일 보기

파일: raw.py 프로젝트: dicai/ratio

def _normalize_array(x):
    flour = tb.tabarray(columns=[x['flour_all'] + x['flour_almond'] + \
            x['flour_bread'] + x['flour_cake'] + x['flour_other'] + \
            x['flour_rice'] + x['flour_wheat'] + x['flour_rye']], \
            names=['flour'])

    liquid = tb.tabarray(columns=[x['milk'] + x['water'] + \
            x['soymilk'] + x['buttermilk'] + x['juice_apple'] + \
            x['juice_can'] + x['juice_lemon'] + x['juice_lime'] + \
            x['juice_orange'] + x['juice_other'] + x['juice_pineapple']], \
            names=['liquid'])

    fat = tb.tabarray(columns=[x['butter'] + x['cream_cheese'] + \
            x['cream_half'] + + x['cream_ice'] + x['cream_other'] + \
            x['cream_tartar'] + x['cream_whipped'] + x['margarine'] + \
            x['oil_canola'] + x['oil_olive'] + x['oil_other'] + \
            x['oil_vegetable'] + x['lard'] + x['shortening']], \
            names=['fat'])

    sugar = tb.tabarray(columns=[x['sugar_brown'] + x['sugar_powder'] + \
            x['sugar_white']], names=['sugar'])

    a = x[['egg']].colstack(fat).colstack(flour).colstack(liquid)\
            .colstack(sugar).extract()
    a = a / np.repeat(a.sum(axis = 1), a.shape[1])\
            .reshape(a.shape[0], a.shape[1])
    return a

예제 #5

0

파일 보기

파일: construct_images.py 프로젝트: nsf-ri-ubicv/renderman_rendering_pipeline

def make_background_db(
    creates="../background_certificate.txt", depends_on=("../3d_hdr_backgrounds.csv", "../2d_grayscale_backgrounds.csv")
):

    conn = pm.Connection()
    db = conn["dicarlocox_3dmodels"]
    db.drop_collection("3d_spherical_backgrounds")

    coll = db["3d_spherical_backgrounds"]

    recs = [
        {"name": "backlot", "path": "backlot.tdl"},
        {"name": "apartment", "path": "apartment.tdl"},
        {"name": "empty room", "path": "empty_room.tdl"},
        {"name": "office", "path": "office.tdl"},
    ]

    for rec in recs:
        coll.insert(rec)

    X = tb.tabarray(SVfile=depends_on[0])
    recs = [{"name": x["Path"][:-4], "path": x["Path"], "description": x["Description"], "type": "3d hdr"} for x in X]
    for rec in recs:
        coll.insert(rec)

    X = tb.tabarray(SVfile=depends_on[1])
    recs = [{"name": x["Path"][:-4], "path": x["Path"], "type": "2d grayscale"} for x in X]
    for rec in recs:
        coll.insert(rec)

    rec = {"name": "blank gray image", "path": "gray.td", "type": "blank"}
    coll.insert(rec)

예제 #6

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

def TestPivot2():
    X = tb.tabarray(records=[('x', 1, 3, 6), ('y', 0, 3, 1), ('x', 0, 3, 5)], 
                    names=['a', 'b', 'c', 'd'])
    Y = X.pivot('b', 'a')
    Z = tb.tabarray(records=[(0, 3, 3, 5, 1), (1, 3, 0, 6, 0)], 
                    names=['b', 'x_c', 'y_c', 'x_d', 'y_d'])
    assert (Y == Z).all()

예제 #7

0

파일 보기

파일: darpa.py 프로젝트: yamins81/v1framework

def specific_config_gen(IC,args):
    IC.base_dir = args['base_dir']
    IC.annotate_dir = args['annotate_dir']
    IC.groundtruth_dir = args['groundtruth_dir']
    IC.correspondence = tb.tabarray(SVfile = args['frame_correspondence'])
    IC.size = args['size']
    IC.prefix = prefix = args.get('image_extension','.jpg')
    IC.current_frame_path = None
    csvs = [x for x in os.listdir(IC.annotate_dir) if x.endswith('.csv')]
    csvs.sort()
    Xs = [tb.tabarray(SVfile = os.path.join(IC.annotate_dir,csv)) for csv in csvs]
    cns = [csv.split('.')[0] for csv in csvs]
    cns = [[cn]*len(X) for (cn,X) in zip(cns,Xs)]
    Xs = [X.addcols(cn,names=['clip_num']) for (cn,X) in zip(cns,Xs)]

    csvs = [x for x in os.listdir(IC.groundtruth_dir) if x.endswith('.csv')]
    csvs.sort()
    Gs = []
    fields = ['clip_num','Frame'] + xfields + yfields
    for ind,csv in enumerate(csvs):
        try:
            g = tb.tabarray(SVfile = os.path.join(IC.groundtruth_dir,csv))
        except:
            x = Xs[ind].addcols([-1]*len(Xs[ind]),names=['Correctness'])
        else:
            g = g.addcols([csv.split('.')[0]]*len(g),names = ['clip_num'])
            g = g[fields + ['Confidence']]
            g.renamecol('Confidence','Correctness')
            x = Xs[ind].join(g,keycols=fields)
        Gs.append(x)
    X = tb.tab_rowstack(Gs)
    X.sort(order=['clip_num','Frame'])
    
    Y = IC.correspondence
    F = tb.fast.recarrayisin(Y[['clip_num','Frame']],X[['clip_num','Frame']])
    Y = Y[F]
    X = X.join(Y,keycols=['clip_num','Frame'])

    params = []
    for t in X:
        print(t)  
        cn = t['clip_num']
        fr = t['Frame']
        box = get_darpa_box(t)
        bb = box.pop('box')
        xc,yc = bb.center
        center = correct_center((xc,yc),IC.size,(1920,1080))
        bb_new = bbox.BoundingBox(center = center,width = IC.size[0], height = IC.size[1])
        p = SON([('size',IC.size),
                     ('bounding_box',SON([('xfields',list(bb_new.xs)),('yfields',list(bb_new.ys))])),
                     ('original_bounding_box',SON([('xfields',list(bb.xs)),('yfields',list(bb.ys))])),
                     ('clip_num',cn),
                     ('Frame',int(t['Original'])),
                     ('base_dir',IC.base_dir),
                     ('correctness',int(t['Correctness']))])
        p.update(box)
        p['GuessObjectType'] = p['ObjectType']
        p['ObjectType'] = p['ObjectType'] if t['Correctness'] == 1 else ''
        params.append(SON([('image',p)]))
    return params

예제 #8

0

파일 보기

def apply_binary(din='../data/compas',
                 froot='compas',
                 neg_names=None,
                 prefix=''):

    ftrain = os.path.join(din, '%s%s_train.out' % (prefix, froot))
    ftrain_label = os.path.join(din, '%s%s_train.label' % (prefix, froot))
    ftest = os.path.join(din, '%s_test.csv' % froot)
    fout = os.path.join(din, '%s%s_test.out' % (prefix, froot))
    flabel = os.path.join(din, '%s%s_test.label' % (prefix, froot))

    x = tb.tabarray(SVfile=ftest)

    if (neg_names is None):
        neg_names = [n.replace(':', ':not-') for n in x.dtype.names]

    y = tb.tabarray(array=(1 - x.extract()), names=neg_names)

    names = list(x.dtype.names) + list(y.dtype.names)
    positive_label_name = x.dtype.names[-1]
    negative_label_name = y.dtype.names[-1]

    x = x.colstack(y)
    recs = [x[negative_label_name], x[positive_label_name]]

    x = x.extract()
    d = dict(zip(names, [x[:, i] for i in range(len(names))]))

    print 'reading rules from', ftrain
    rule_descr = [
        line.strip().split()[0]
        for line in open(ftrain, 'rU').read().strip().split('\n')
    ]

    print 'extracting these rules from', ftest
    out = []
    for descr in rule_descr:
        rule = [clause for clause in descr.strip('{}').split(',')]
        bv = np.cast[str](np.cast[int](np.array([
            (d[name] == 1) for name in rule
        ]).all(axis=0)))
        out.append('%s %s' % (descr, ' '.join(bv)))

    print 'writing', fout
    f = open(fout, 'w')
    f.write('\n'.join(out) + '\n')
    f.close()

    print 'writing', flabel
    labels = [
        line.split()[0]
        for line in open(ftrain_label, 'rU').read().strip().split('\n')
    ]
    f = open(flabel, 'w')
    f.write('\n'.join([
        '{%s} %s' % (l, ' '.join(np.cast[str](np.cast[int](r))))
        for (l, r) in zip(labels, recs)
    ]) + '\n')
    f.close()

예제 #9

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

def test_bionumbers():
    X = tb.tabarray(SVfile = 'tests/bionumbers.txt') 
    fname = TestDataDir + 'bionumbers.txt'
    X.saveSV(fname, quotechar="'")
    Y = tb.tabarray(SVfile = TestDataDir + 'bionumbers.txt',quotechar="'")
    names = ('ID', 'Property', 'Organism', 'Value', 'Units', 'Range', 
              'NumericalValue', 'Version')               
    assert_bio(X.dtype.names == names and len(X) == 4615 and eq(X,Y), fname)

예제 #10

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

 def test_missingvals4(self):
     fname = TestDataDir + 'missingvals4.csv'
     F = open(fname,'w')
     F.write('Name,Age,Gender\nDaniel,12,M\nElaine,'',F\nFarish,46,')
     F.close()
     X = tb.tabarray(SVfile=fname)
     X2 = tb.tabarray(records=[('Daniel', 12, 'M'), ('Elaine', np.nan, 'F'), ('Farish', 46, '')],names=['Name','Age','Gender'])
     self.assert_io(eq(X, X2), fname)

예제 #11

0

파일 보기

파일: bls.py 프로젝트: govdata/govdata-parsers

def makemetadata(code,datadir,outfile1,outfile2,depends_on = (resource_root + 'ProcessedManifest_2_HandAdditions.tsv',resource_root + 'Keywords.txt')):

    Z  = {}

    keyword_file = depends_on[1]
    Y = tb.tabarray(SVfile = keyword_file)[['Code','Keywords']]
    y = Y[Y['Code'] == code]
    Z['keywords'] = [x.strip() for x in str(y['Keywords'][0]).split(',')]
    
    
    dirl = np.array(listdir(datadir))
    
    pr = lambda x : x.split('!')[-1][:-4]
    p=re.compile('\([^\)]*\)')
    
    tps = [l for l in dirl if l.endswith('.txt.txt')]
    if tps:
        textpath = datadir + tps[0]
        [SD,things] = ParseTexts(textpath,code)
        FNs = [p.sub('',things[pr(y).lower()]).replace(' ,',',').replace(',,',',') if pr(y).lower() in things.keys() else '' for y in dirl]
        FNs = [z.split('=')[1] if '=' in z and not ' =' in z else z for z in FNs]
    else:
        SD = ''
        FNs = len(dirl)*['']
        
    Z['description'] = SD
    
    cfs = [l for l in dirl if l.endswith('.contacts.txt')]
    if cfs:
        contactfile = datadir + cfs[0]
        ctext = open(contactfile,'rU').read().strip()
        if '<html>' in ctext.lower():
            clines = ctext.split('\n')
            fb = [i for i in range(len(clines)) if clines[i].strip() == ''][0]
            ctext = '\n'.join(clines[fb+1:])
        ctext = ctext.strip(' *\n').replace('\n\n','\n')    
    else:
        ctext = ''
        
    Z['contactInfo'] = ctext
    f = open(outfile1,'w')
    pickle.dump(Z,f)
    f.close()

    Y = tb.tabarray(SVfile = depends_on[0])
    Y.sort(order = ['File'])

    
    dirlp = np.array([pr(y) for y in dirl])
    [A,B] = tb.fast.equalspairs(dirlp,Y['File'])
    if (B>A).any():
        print 'adding hand-made content to', dirlp[B>A]
        for k in (B>A).nonzero()[0]:
            FNs[k] = Y['FileName'][A[k]]    
    
    D = tb.tabarray(columns=[dirl,FNs], names = ['Path','FileName'])
    
    D.saveSV(outfile2,metadata = True)

예제 #12

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

def TestReplace2():
    V1 = ['North', 'South', 'East', 'West']
    V2 = ['Service', 'Manufacturing', 'Education', 'Healthcare']
    Recs = [(a, b, np.random.rand() * 100, np.random.randint(100000)) 
                                                       for a in V1 for b in V2]
    X = tb.tabarray(records=Recs, 
                    names=['Region', 'Sector', 'Amount', 'Population'])
    X2 = tb.tabarray(records=Recs, 
                     names=['Region', 'Sector', 'Amount', 'Population'])

    X.replace('S', 'M')
    assert((X == X2).all())

예제 #13

0

파일 보기

파일: main.py 프로젝트: govdata/govdata-parsers

def get_subclass_pages():
    X = tb.tabarray(SVfile = 'catlevels.tsv')
    recs = []
    p = re.compile('Sub\d')
    f = lambda x : p.match(dict(x.attrs).get('class',''))
    for x in X[:]:
        subrecs = []
        cat = x['CLASS']
        fixed_cat = fix_cat(cat)
        title = x['CLASS TITLE']
        os.system('wget http://www.uspto.gov/web/patents/classification/uspc' + fixed_cat + '/sched' + fixed_cat + '.htm -O ' + cat + '.html')
        Soup = BeautifulSoup.BeautifulSoup(open(cat + '.html'))
        Crac = Soup.find(True,'CracHeader')
        For = Soup.find(True,'ForHeader')
        Dig = Soup.find(True,'DigHeader')
        if Crac:
            end = Crac
        elif For:
            end = For
        elif Dig:
            end = Dig
        else:
            end  = None
        if end:
            T = end.findAllPrevious('tr',valign='top')[:]
        else:
            T = Soup.findAll('tr',valign='top')[:]
        T.reverse()
        for (i,t) in enumerate(T): 
            try:
                subclass = str(Contents(t.find(f)).replace('&nbsp;','').strip())
            except:
                pass
            else:
                try:
                    subtitle = Contents(t.find(True,"SubTtl")).strip()
                except:
                    pass
                else:
                    try:
                        indent = int(dict(t.find(True,"SubTtl").find("img").attrs)['src'].split('/')[-1].split('_')[0])
                    except AttributeError:
                        indent = 0
                    #print (cat,title,subclass,subtitle,indent)    
                    subrecs.append((cat,title,subclass,subtitle,indent))
        subrecs.reverse()
        recs.extend(subrecs)

    Y = tb.tabarray(records = recs, names=['Class','Title','Subclass','Subtitle','Indent'],formats=['str','str','str','str','int'])
    Y.saveSV('classifications.tsv',metadata=True)

예제 #14

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

def TestPivot3():
    V1 = ['NorthAmerica', 'SouthAmerica', 'Europe', 'Asia', 'Australia', 
          'Africa', 'Antarctica']
    V1.sort()
    V2 = ['House', 'Car', 'Boat', 'Savings', 'Food', 'Entertainment', 'Taxes']
    V2.sort()
    Recs = [(a, b, 100 * np.random.rand()) for a in V1 for b in V2]
    X = tb.tabarray(records=Recs, names=['Region', 'Source', 'Amount'])
    Y = X.pivot('Region', 'Source')
    Z = utils.uniqify(X['Source'])
    Z.sort()
    Cols = [[y['Amount'] for y in X if y['Source'] == b] for b in Z]
    W = tb.tabarray(columns=[V1] + Cols, 
                    names=['Region'] + [b + '_Amount' for b in Z])
    assert (W == Y).all()

예제 #15

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

def TestPivot4():
    V1 = ['NorthAmerica', 'SouthAmerica', 'Europe', 'Asia', 'Australia', 'Africa', 'Antarctica']
    V1.sort()
    V2 = ['House', 'Car', 'Boat', 'Savings', 'Food', 'Entertainment', 'Taxes']
    V2.sort()
    Recs = [(a, b, 100 * np.random.rand()) for a in V1 for b in V2]
    X = tb.tabarray(records=Recs[:-1],
                    names=['Region', 'Source', 'Amount'])
    Y = X.pivot('Region', 'Source', 
                NullVals=dict([(o,-999) for o in X.dtype.names]))
    X2 = tb.tabarray(records=Recs, names=['Region', 'Source', 'Amount'])
    Y2 = X.pivot('Region','Source')
    Y2[V2[-1] + '_Amount'][-1] = -999

    assert (Y == Y2).all()

예제 #16

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

 def test_load_save_TSV_infer2(self):
     fname = TestDataDir + 'test2.tsv'
     self.X.saveSV(fname, printmetadict=False, 
                   metadata=['coloring', 'names'])
     X2 = tb.tabarray(SVfile=fname, 
                      metametadata={'coloring': 0, 'names': 1})
     self.assert_io(eq(self.X, X2), fname)

예제 #17

0

파일 보기

파일: darpa.py 프로젝트: yamins81/v1framework

def get_results(mean,std,ext_hash,splitfilename,outfile):
    conn = pm.Connection(document_class = SON)
    db = conn['thor']
    fcol = db['features.files']
    split_fs = gridfs.GridFS(db,'split_performance')
    fh = split_fs.get_version(splitfilename)
    r = cPickle.loads(fh.read())
    r = r['split_result']['cls_data']
    weights = r['coef']
    bias = r['intercept']
    L = fcol.find({'__hash__':ext_hash},fields=['image.clip_num','image.Frame','feature','image.bounding_box'])
    recs = []
    names = ['clip_num','frame','x1','x2','x3','x4','y1','y2','y3','y4'] + labels    
    for l in L:
        cn = str(l['image']['clip_num'])
        fr = l['image']['Frame']
        print(l['_id'],cn,fr)
        bx = l['image']['bounding_box']['xfields']
        by = l['image']['bounding_box']['yfields']
        feat = l['feature']
        feat = (feat - mean)/std
        m = sp.dot(feat,weights) + bias
        rec = (cn,fr,) + tuple(bx) + tuple(by) + tuple(m)
        recs.append(rec)
        if len(recs) == 10000:
            X = tb.tabarray(records = recs, names = names)
            tb.io.appendSV(outfile,X,metadata=True)
            recs = []

예제 #18

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

 def setUp(self):
     self.D = tb.tabarray(
              array=[(2, 'a', 2, 'cc', 3.0), (2, 'b', 5, 'dcc', 2.0), 
                     (7, 'e', 2, 'cjc', 8.0), (2, 'e', 2, 'ccj', 3.0)], 
              names=['a', 'c', 'b', 'd', 'e'], formats='i4,|S1,i4,|S3,f8', 
              coloring={'moo': ['a', 'b'], 'boo': ['a', 'd', 'e']})
     self.Root = 'basic'

예제 #19

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

 def setUp(self):
     names = ['name', 'ID', 'color', 'size', 'June', 'July']
     data = [('bork', 1212, 'blue', 'big', 45.32, 46.07), 
             ('mork', 4660, 'green', 'small', 32.18, 32.75), 
             ('stork', 2219, 'red', 'huge', 60.93, 61.82), 
             ('lork', 4488, 'purple', 'tiny', 0.44, 0.38)]
     self.x = tb.tabarray(records=data, names=names)

예제 #20

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

 def test_toload_redundant_tsv(self):
     toload = ['a', 'boo']
     fname = TestDataDir + self.Root + '6.tsv'
     self.D.saveSV(fname, metadata=['names', 'formats', 'types', 'coloring', 'dialect'])
     D = tb.tabarray(SVfile=fname, usecols=toload)
     assert set(D.dtype.names) == set(D.coloring['boo'])
     self.assert_io(eq(self.D[toload], D[toload]), fname)

예제 #21

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

    def test_strictjoin3(self):
        X = self.X
        keycols = self.keycols
        others=self.others
        X1 = X[:(3 * len(X) / 4)][keycols + others[0]]
        X2 = X[(len(X) / 4):][keycols + others[1]]
        Y = spreadsheet.strictjoin([X1, X2], self.keycols)
        Y.sort(order=keycols)

        nvf = utils.DEFAULT_NULLVALUEFORMAT
        nvf1 = nvf(X[others[1][0]].dtype.descr[0][1])
        nvf2 = nvf(X[others[1][1]].dtype.descr[0][1])
        nvf3 = nvf(X[others[0][0]].dtype.descr[0][1])
        nvf4 = nvf(X[others[0][1]].dtype.descr[0][1])

        Recs = ([(a, b, c, d, nvf1, nvf2) for (a, b, c, d, e, f) 
                                         in X[:(len(X) / 4)]] + 
                [(a, b, c, d, e, f) for (a, b, c, d, e, f) 
                                   in X[(len(X) / 4):(3 * len(X) / 4)]] + 
                [(a, b, nvf3, nvf4, e, f) for (a, b, c, d, e, f) 
                                         in X[(3 * len(X) / 4):]])
        Z = tb.tabarray(records=Recs, names=X.dtype.names)
        Z.sort(order=self.keycols)

        self.assert_((Y == Z).all())

예제 #22

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

 def test_load_save_CSV_infer(self):
     fname = TestDataDir + 'test.csv'
     self.X.saveSV(fname)
     X2 = tb.tabarray(SVfile=fname)  # normal scenario: names, no comments
     Z = self.X.copy()
     Z.coloring = {}
     self.assert_io(eq(Z, X2), fname)

예제 #23

0

파일 보기

파일: htools.py 프로젝트: govdata/govdata-core

def applysplitter(manifest,splitdir):
	MakeDir(splitdir)
	M = tb.tabarray(SVfile = manifest)
	vals = tb.uniqify(M['Prefix'])
	for v in vals:
		Mv = M[M['Prefix'] == v]
		Mv.saveSV(splitdir + 'Manifest_' + pathprocessor([v]) + '.tsv', metadata=True)

예제 #24

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

 def test_load_save_TSV_infer(self):
     fname = TestDataDir + 'test.tsv'
     self.X.saveSV(fname)
     X2 = tb.tabarray(SVfile=fname)
     Z = self.X.copy()
     Z.coloring = {}
     self.assert_io(eq(Z, X2), fname)

예제 #25

0

파일 보기

파일: driver.py 프로젝트: feziodoshi/mturkutils

def get_meta(selected_basic_objs=SELECTED_BASIC_OBJS):
    """Mix the objectome 64 basic-level set and the chair subordinate
    level set"""
    assert len(np.unique(selected_basic_objs)) == 30
    meta_chairs = pk.load(open('meta_objt_chairs_subord_v3.pkl'))
    meta_basic = pk.load(open('meta_objt_full_64objs.pkl'))

    si = [
        i for i, e in enumerate(meta_basic) if e['obj'] in selected_basic_objs
    ]
    assert len(si) == 30 * 1000

    cnames = list(meta_chairs.dtype.names)
    assert list(meta_basic.dtype.names) == cnames
    cnames.remove('internal_canonical')
    cnames.remove('texture')  # contains None
    cnames.remove('texture_mode')  # contains None

    meta = tb.tabarray(columns=[
        np.concatenate([meta_basic[e][si], meta_chairs[e]]) for e in cnames
    ],
                       names=cnames)
    assert len(meta) == 30 * 1000 * 2
    assert len(np.unique(meta['obj'])) == 60  # 30 non-chairs + 30 chairs
    return meta, meta_basic, meta_chairs

예제 #26

0

파일 보기

파일: main.py 프로젝트: yamins81/v1framework

def check_stamps(metadatafile,imagedir,train_frames,outdir):
    os.mkdir(outdir)
    metadata = tb.tabarray(SVfile=metadatafile)
    #get labels for training objects

    train_labels_inds = []
    for cn,fr in train_frames:
        inds = ((metadata['Frame'] == fr) & (metadata['clip_num'] == cn) & (metadata['ObjectType'] != 'DCR')).nonzero()[0]
        #ind = inds[t['object_number']]
        train_labels_inds.extend(inds)
    train_labels = metadata[train_labels_inds]
    #get stamps for training objects

    train_points = []
    train_points_labels = []
    sizes = []
    num_train = 0
    for label in train_labels:
        lbl = label['clip_num'] + '_' + str(label['Frame']) + '.jpg'
        print(label)
        framefile = os.path.join(imagedir,lbl)
        im = get_image(framefile)

        box = bbox.BoundingBox(xs = [label[xf] for xf in xfields],
                               ys = [label[yf] for yf in yfields])
        stamp = bbox.stamp(im,box,stamp_shape=(200,200))[0]
        if stamp is not None:
            img = Image.fromarray(stamp)
            img.save(os.path.join(outdir,str(num_train) + '.jpg'))
            num_train += 1

예제 #27

0

파일 보기

파일: fluoride_parser.py 프로젝트: govdata/govdata-parsers

def parser2():
    files_to_parse = [x for x in os.listdir('.') if x.endswith('_file.html')]
    
    for file in files_to_parse:
        print('parsing',file)
        #next step of getting data for each water system
        H20Systems = BeautifulSoup.BeautifulSoup(open(file))
        table = H20Systems.findAll('table')[3].findAll('table')[7]
        TR = table.findAll('tr',bgcolor='#F5F5F5')
        Links = [str(dict(tr.findAll('td')[1].findAll('a')[0].attrs)['href']) for tr in TR]
        Names = [utils.Contents(tr.findAll('td')[1]) for tr in TR]
        Number = [utils.Contents(tr.findAll('td')[2]).replace('&nbsp;',' ') for tr in TR]
        County = [utils.Contents(tr.findAll('td')[3]) for tr in TR]
        outname = file.split('_')[0] + '_file.tsv'

        tb.tabarray(columns = [Links,Names,Number,County],names=['Links','Names','Number','County']).saveSV(outname,metadata=True)

예제 #28

0

파일 보기

파일: objectome_behavior.py 프로젝트: RishiRajalingham/objectome_utils

    def get_data(self):

        trial_records = []
        obj_oi = np.unique(self.meta["obj"])
        img_oi = np.unique(self.meta["id"])

        for subj in self.data:
            for r, i, ss in zip(subj["Response"], subj["ImgData"], subj["StimShown"]):
                if len(i) > 1:
                    s = i["Sample"]
                    t = i["Test"]
                    s_id = s["id"]
                    s_obj = s["obj"]
                    t_obj = [t_["obj"] for t_ in t]
                    d_obj = [t_ for t_ in t_obj if t_ != s_obj][0]
                    resp = t_obj[r]
                else:  # backwards compatibility with previous mturkutils
                    s_id = i[0]["id"]
                    s_obj = i[0]["obj"]
                    t_obj = [strip_objectomeFileName(fn) for fn in ss[1:]]
                    d_obj = [t_ for t_ in t_obj if t_ != s_obj][0]
                    resp = strip_objectomeFileName(r)

                if (s_id in img_oi) & (d_obj in obj_oi):
                    rec_curr = (s_obj,) + (d_obj,) + (resp,) + (s_id,) + (subj["WorkerID"],) + (subj["AssignmentID"],)
                    trial_records.append(rec_curr)

        self.trials = tb.tabarray(records=trial_records, names=self.trial_kw_names, formats=self.trial_kw_formats)
        return

예제 #29

0

파일 보기

파일: riskfactors_db.py 프로젝트: govdata/govdata-parsers

def create_metadata():
# Metadata
    name = 'riskfactors'
    source = {
	"agency": {"shortName": "DHHS", "name": "Department of Health and Human Services"},
	"subagency": {"shortName": "CDC", "name": "Centers for Disease Control and Prevention"},
	"topic": {"name": "Health and Nutrition"},
	#"subtopic": {"name": "Release Quantity Data"},
	"program": {"shortName": "CHSI", "name": "Community Health Status Indicators"},
	"dataset": {"shortName": "ohdc", "name": "Community Health Status Indicators (CHSI) to Combat Obesity, Heart Disease and Cancer"}
}
    Y = tb.tabarray(SVfile = 'DATAELEMENTDESCRIPTION.csv')
    metadata = {
	'title':'Community Health Status Indicators (CHSI) to Combat Obesity, Heart Disease and Cancer',
	'description':"Community Health Status Indicators (CHSI) to combat obesity, heart disease, and cancer are major components of the Community Health Data Initiative. This dataset provides key health indicators for local communities and encourages dialogue about actions that can be taken to improve community health (e.g., obesity, heart disease, cancer). The CHSI report and dataset was designed not only for public health professionals but also for members of the community who are interested in the health of their community. The CHSI report contains over 200 measures for each of the 3,141 United States counties. Although CHSI presents indicators like deaths due to heart disease and cancer, it is imperative to understand that behavioral factors such as obesity, tobacco use, diet, physical activity, alcohol and drug use, sexual behavior and others substantially contribute to these deaths.",
	'keywords':['Obesity','CHSI','health','data','community','indicators','interventions','performance','measurable','life expectancy','mortality','disease','prevalence','risk','factors','behaviors','socioeconomic','environments','access','cost','quality','warehouse','heart','cancer'],
	'uniqueIndexes':['Location'],
	'sliceCols':[['Location']],
	'columnGroups':{
		'spaceColumns':['Location'],
		'labelColumns':['Location']
	},

       'columnDescriptions': dict([(y['COLUMN_NAME'],y['DESCRIPTION']) for y in Y]),
       'source':{
        "agency": {"shortName": "DHHS", "name": "Department of Health and Human Services"},
        "subagency": {"shortName": "CDC", "name": "Centers for Disease Control and Prevention"},
        "topic": {"name": "Health and Nutrition"},
        #"subtopic": {"name": "Release Quantity Data"},
        "program": {"shortName": "CHSI", "name": "Community Health Status Indicators"},
        "dataset": {"shortName": "ohdc", "name": "Community Health Status Indicators (CHSI) to Combat Obesity, Heart Disease and Cancer"}
         }

}
    return metadata

예제 #30

0

파일 보기

파일: MetaData.py 프로젝트: yamins81/CGA-Geodjango-APIs

def FindPtime(target,Simple=False):
	'''
	Returns last time, according to runtime meta data, that 
	a target was succesfully created, if it is created data. 
	'''

	metapath = metadatapath(target) + '/CreationRecord.csv'
	if PathExists(metapath):
		try: 
			Data = tb.tabarray(SVfile = metapath,delimiter = ',', lineterminator='\n') 
			if len(Data) > 0:
				Data.sort(order=['TimeStamp'])
				if any(Data['ExitType'] == 'Success'):
					MostRecentSuccess = Data[Data['ExitType'] == 'Success']['TimeStamp'][-1]
					MoreRecentFailures = Data[(Data['ExitType'] == 'Failure') & (Data['TimeStamp'] > MostRecentSuccess)]
					if len(MoreRecentFailures) > 0:
						LeastRecentFailure = MoreRecentFailures['TimeStamp'][0]
					else:
						LeastRecentFailure = numpy.inf
					return Data[(Data['TimeStamp'] >= MostRecentSuccess) & (Data['TimeStamp'] < LeastRecentFailure)]['TimeStamp'][-1] 
				else:
					return numpy.nan
			else:
				return numpy.nan
		except:
			return numpy.nan
		else: pass
	else:
		return numpy.nan

예제 #31

0

파일 보기

파일: exp.py 프로젝트: qbilius/conv-exp

    def create_exp_plan(self):
        """Define each trial's parameters
        """
        df = pandas.read_csv('../data/snodgrass.csv', sep='\t')
        df['imgno'] = range(1, self.trials_per_hit + 1)
        df = pandas.concat([df for i in range(self.nsubj)])
        df['subjid'] = np.repeat(range(self.nsubj), self.trials_per_hit)
        df['order'] = np.hstack([
            np.random.permutation(self.trials_per_hit)
            for i in range(self.nsubj)
        ])
        # df['kind'] = np.repeat(['color', 'gray', 'silhouette'], self.trials_per_hit * self.nsubj // 3)
        df['kind'] = np.repeat(['color'], self.trials_per_hit * self.nsubj)
        df['isi1'] = 500
        df['stim_dur'] = 100
        df['isi2'] = 500
        df['subj_resp'] = None
        df['acc'] = np.nan
        df['rt'] = np.nan

        df = df.sort_values(by=['subjid', 'order'])
        rec = df.to_records(index=False)
        exp_plan = tb.tabarray(array=rec, dtype=rec.dtype)
        if self.save:
            self.save_exp_plan(exp_plan)
        return exp_plan

예제 #32

0

파일 보기

파일: dataset.py 프로젝트: ardila/skdata

    def fetch(self, wnids, seed=None, num_per_synset=100, firstonly=False, path=os.getcwd(),
              username='******', accesskey='bd662acb4866553500f17babd5992810e0b5a439'):
        """
        Stores a random #num images for synsets specified by wnids from the latest release to path specified
        Since files are stored as tar files online, the entire synset must be downloaded to access random images.

        If 'all' is passed as the num argument, all images are stored.

        If the argument firstonly is set to true, then download times can be reduced by only extracting the first
        few images

        This method overwrites previous fetches: files and metadata are deleted
        """
        if not os.path.exists(path + '/'):
            os.makedirs(path + '/')
        wnids = list(wnids)
        random.seed(seed)
        kept_names = []
        kept_wnid_list = []
        if hasattr(self, '_meta'):
            files_to_remove = np.unique(self.meta['filename'])
            for file_to_remove in files_to_remove:
                try:
                    print path + '/' + file_to_remove
                    os.remove(path + '/' + file_to_remove)
                except OSError:
                    print "metadata is stale, clear cache directory"
        for i, wnid in enumerate(wnids):
            synset_names = []
            url = 'http://www.image-net.org/download/synset?' + \
                  'wnid=' + str(wnid) + \
                  '&username='******'&accesskey=' + accesskey + \
                  '&release=latest'
            print i
            url_file = urlopen(url)
            tar_file = tarfile.open(fileobj=url_file, mode='r|')
            if firstonly and not (num_per_synset == 'all'):
                keep_idx = xrange(num_per_synset)
                for i in keep_idx:
                    tarinfo = tar_file.next()
                    synset_names.append(tarinfo.name)
                    tar_file.extract(tarinfo, path)
            else:
                for tarinfo in tar_file:
                    synset_names.append(tarinfo.name)
                    tar_file.extract(tarinfo, path)
                if num_per_synset == 'all':
                    keep_idx = range(len(synset_names))
                else:
                    keep_idx = sample(range(len(synset_names)), num_per_synset)
                files_to_remove = frozenset(synset_names) - frozenset([synset_names[idx] for idx in keep_idx])
                for file_to_remove in files_to_remove:
                    os.remove(path + '/' + file_to_remove)
            kept_names.extend([synset_names[idx] for idx in keep_idx])
            kept_wnid_list.extend([wnid] * len(keep_idx))
        meta = tb.tabarray(records=zip(kept_names, kept_wnid_list), names=['filename', 'wnid'])
        self._meta = meta
        self.path = path
        tb.io.savebinary('imagenet_meta.npz', self._meta)

예제 #33

0

파일 보기

파일: synthstim.py 프로젝트: RishiRajalingham/monkey_objectome

def get_alpha_images():
    dataset_obj = objectome64s100alpha(internal_canonical=True)
    dataset_bg = objectome64s100bg(internal_canonical=True)    
    img_res, img_size = 1024, 256
    
    obj_imgs = dataset_obj.get_images({'dtype':'uint8', 'size': (img_res, img_res,4), 'normalize':False, 'mode':'RGBA'}, get_models=True)
    bg_imgs = dataset_bg.get_images({'dtype':'uint8', 'size': (img_res, img_res), 'normalize':False, 'mode':'L'}, get_models=True)
    
    IMGS = []
    alphaval = []
    for i in xrange(obj_imgs.shape[0]):
        background = Image.fromarray(bg_imgs[i])
        foreground = obj_imgs[i]
        tmp_alpha = np.random.uniform(0.25,1)

        foreground[:,:,-1] = foreground[:,:,-1] * tmp_alpha
        alphaval.append(tmp_alpha)
        foreground = Image.fromarray(foreground)
        background.paste(foreground, (0, 0), foreground)
        IMGS.append(np.asarray(background))

    meta = dataset_obj.meta
    names = meta.dtype.names + ('obj_alpha',) 
    formats = zip(*meta.dtype.descr)[1] + ('float',)
    META = tb.tabarray(records=[tuple(meta[i]) + (alphaval[i],) for i in range(len(meta))], names=names, formats=formats)
    
    return IMGS, META

예제 #34

0

파일 보기

파일: objectome_behavior.py 프로젝트: RishiRajalingham/objectome_utils

def get_monkeyturk_data(dataset="objectome24"):
    if dataset == "objectome24":
        meta_path = "/mindhive/dicarlolab/u/rishir/stimuli/objectome24s100/metadata.pkl"
        data_path = "/mindhive/dicarlolab/u/rishir/monkeyturk/allData.mat"

    meta = pk.load(open(meta_path, "r"))
    datmat = io.loadmat(data_path)
    uobjs = obj.models_combined24

    col_data_seg = {}
    trial_records = []
    subjs = ["Manto", "Zico", "Picasso", "Nano", "Magneto"]
    for sub in subjs:
        x = datmat["allData"][sub][0, 0]
        for xi in range(x.shape[0]):
            s_obj = uobjs[x[xi, 0]]
            d_obj = uobjs[x[xi, 2]]
            resp = uobjs[x[xi, 3]]
            s_id = meta[x[xi, 4] - 1]["id"]
            workid = sub
            assnid = "MonkeyTurk"

            rec_curr = (s_obj,) + (d_obj,) + (resp,) + (s_id,) + (workid,) + (assnid,)
            trial_records.append(rec_curr)

    col_data_seg["all"] = tb.tabarray(records=trial_records, names=KW_NAMES, formats=KW_FORMATS)
    for sub in subjs:
        t = col_data_seg["all"]["WorkerID"] == sub
        col_data_seg[sub] = col_data_seg["all"][t]
    return col_data_seg

예제 #35

0

파일 보기

파일: fluoride_parser.py 프로젝트: govdata/govdata-parsers

def parse_lowest_level():
    files_to_parse = utils.ListUnion([[os.path.join(x,y) for y in os.listdir(x)] for x in os.listdir('.') if x.endswith('DETAILS')])

    kvpairs = []
    for file in files_to_parse:
        print(file)
        Soup = BeautifulSoup.BeautifulSoup(open(file))
        bolds = Soup.findAll('b')
        bolds = [b for b in bolds if utils.Contents(b).endswith(':')]
        newkvpairs = [(utils.Contents(b).strip(': '),utils.Contents(b.findNext()).strip()) for b in bolds][:-1]
        if len(bolds) > 0:
            newkvpairs.append((utils.Contents(bolds[-1]).strip(': '),''.join([utils.Contents(x) if utils.Contents(x) != '' else '\n' for x in bolds[-1].findNext().contents])))
    
        kvpairs.append(newkvpairs)

    tb.tabarray(kvpairs = kvpairs).saveSV('final_results.tsv',metadata=True)

예제 #36

0

파일 보기

def download_images_by_synset(
        synsets,
        seed=None,
        num_per_synset='all',
        firstonly=False,
        path=None,
        imagenet_username='******',
        accesskey='bd662acb4866553500f17babd5992810e0b5a439'):
    """
    Stores a random #num images for synsets specified by synsets from the latest release to path specified
    Since files are stored as tar files online, the entire synset must be downloaded to access random images.

    If 'all' is passed as the num argument, all images are stored.

    If the argument firstonly is set to true, then download times can be reduced by only extracting the first
    few images

    Returns a meta tabarray object containing wnid and filename for each downloaded image
    """
    if path is None:
        path = os.getcwd()
    if not os.path.exists(path):
        os.makedirs(path)
    synsets = list(synsets)
    random.seed(seed)
    kept_names = []
    kept_synset_list = []
    for i, synset in enumerate(synsets):
        synset_names = []
        url = 'http://www.image-net.org/download/synset?' + \
              'wnid=' + str(synset) + \
              '&username='******'&accesskey=' + accesskey + \
              '&release=latest'
        print i
        print url
        url_file = urlopen(url)
        tar_file = tarfile.open(fileobj=url_file, mode='r|')
        if firstonly and not (num_per_synset == 'all'):
            keep_idx = xrange(num_per_synset)
            for tarinfo in tar_file:
                synset_names.append(tarinfo.name)
                tar_file.extract(tarinfo, path)
        else:
            for tarinfo in tar_file:
                synset_names.append(tarinfo.name)
                tar_file.extract(tarinfo, path)
            if num_per_synset == 'all':
                keep_idx = range(len(synset_names))
            else:
                keep_idx = sample(range(len(synset_names)), num_per_synset)
            files_to_remove = frozenset(synset_names) - frozenset(
                [synset_names[idx] for idx in keep_idx])
            for file_to_remove in files_to_remove:
                os.remove(path + '/' + file_to_remove)
        kept_names.extend([synset_names[idx] for idx in keep_idx])
        kept_synset_list.extend([synset] * len(keep_idx))
    meta = tb.tabarray(records=zip(kept_names, kept_synset_list),
                       names=['filename', 'synset'])
    return meta

예제 #37

0

파일 보기

파일: api.py 프로젝트: yamins81/vistats

    def get(self):
        meta = tb.tabarray(SVfile=os.path.join(RESULTS_ROOT, 'meta_with_margin_test.tsv'))
        
        N = 3
        days = 4
        text = ''
        NP = 200
        NN = 200
        mdp = meta[:NP]
        mdp = mdp[np.random.RandomState(None).permutation(NP)]['filename']
        mdn = meta[-NN:]
        mdn = mdn[np.random.RandomState(None).permutation(NN)]['filename']

        for d in range(days):
            text += '<div class="entry" id="day_header_%d"><div class="entryheader"><h2 class="entrytitle">Day %d</h2>' % (d, d+1)
            text += '<div class="date">posted by <a href="/vistats_blog">Vistats</a> on 2013.06.19</div></div>'
            ctext = "So, here are my Chanel pics of the day :)"
            text += '<p class="chanel_header" id="chanel_header_day_%d">%s</p><br/>' % (d, ctext)
            chaneltext = '<div class="img_div" id="chanel_img_div_day_%d">' % d + ''.join(['<div class="show_img" id="chanel_img_day_%d_img_%d"><img src="%s/%s"></div>' % (d, _i, IMG_ROOT,x.split('/')[-1]) for _i, x in enumerate(mdp[d*N:(d+1)*N])]) + '</div>'
            text += chaneltext
            notchaneltext = '<div class="img_div" id="not_chanel_img_div_day_%d">' % d + ''.join(['<div class="show_img" id="not_chanel_img_day_%d_img_%d"><img src="%s/%s"></div>' % (d, _i, IMG_ROOT, x.split('/')[-1]) for _i, x in enumerate(mdn[d*N:(d+1)*N])]) + '</div>'
            nctext = "<br/>Hey, and of course I also have a life <b>outside</b> of Chanel :)"
            text += '<p class="not_chanel_header" id="not_chanel_header_day_%d">%s</p><br/>' % (d, nctext) + notchaneltext
            text += '</div>'

        html = HTML_TEMPLATE % text
        
        self.write(html)

        self.finish()

예제 #38

0

파일 보기

파일: utils.py 프로젝트: barthess/ECLGenerator

def columnwider(narrow_tab): #{{{
    """ Функция для расширения столбцов

    Принимает обычную таблицу, возвращает "раздутую"

    Из-за недоработок класса tabular колонки не могут расширяться динамически,
    поэтому на придется заранее вставить в конец файла поля заведомо
    бОльшей ширины.
    """
    # crate fake row
    first_row = narrow_tab[:1] # возьмем первый ряд для определения типа колонок
    empty_tuple = ()

    for i in first_row.dtype.names:
        if (type(first_row[i][0]).__name__) == 'string_':
            empty_tuple += (column_strut,)
        else:
            empty_tuple +=('',)

    wide_row = tb.tabarray(records=(empty_tuple,), names=list(first_row.dtype.names))

    # now we have table from one empty wide row
    # stack them to input table
    wide_tab = narrow_tab.rowstack([wide_row])

    # for now wide row is unnecessary
    # remove them
    wide_tab = wide_tab[:-1]

    return wide_tab

예제 #39

0

파일 보기

def HiAssocWords(page, depends_on=root_wf + 'WordFrequencies.csv'):

    CFreq = PageWordFreqs(page)

    if CFreq != None:

        WFreq = tb.tabarray(SVfile=depends_on, verbosity=0)

        WF = WFreq[tb.fast.isin(
            WFreq['Word'],
            CFreq['Word'],
        )]

        CC = CFreq.join(WF, keycols='Word', Names=['InPage', 'Overall'])

        N = float(CFreq['Frequency'].sum())

        DD = (1 /
              N) * CC['Frequency_InPage'] - 10**(-9) * CC['Frequency_Overall']
        s = DD.argsort()[::-1]
        DD = DD[s]
        CC = CC[s]
        CC = CC.addcols(DD, names='FrequencyDelta')

        return CC[['Word', 'Frequency_InPage', 'FrequencyDelta']]

    else:
        return None

예제 #40

0

파일 보기

파일: bls.py 프로젝트: govdata/govdata-core

def getcategorydata(code,depends_on = os.path.join(DATA_ROOT ,'BLS_Hierarchy','Manifest_1.tsv')):
    
    manifest = depends_on
    
    X = tb.tabarray(SVfile = manifest)
   
    Codes = np.array([x.split('/')[-2] for x in X['URL']])
        
    x = X[Codes == code][0] 
    topic = str(x['Level1'])
    subtopic = str(x['Level2'])
    xx = str(x['Level3'])
    if len(xx.split(':')) > 1 and '-' in xx.split(':')[1]:
        Dataset = xx.split(':')[0].strip()      
        y = ':'.join(xx.split(':')[1:]).strip('() ')
        ProgramName = y.split('-')[0].strip()
        ProgramAbbr = y.split('-')[1]
    elif xx.strip().endswith(')'):
        Dataset = xx[:xx.find('(')].strip()
        ProgramAbbr = xx[xx.find('('):].strip(' ()')
        if not ProgramAbbr.isupper():
            ProgramAbbr = ''
        ProgramName = ''
    else:
        Dataset = xx
        ProgramName = ''
        ProgramAbbr = ''
    
    if code == 'jt':
        ProgramName = 'JOLTS'
        
        
    return {'Topic':topic,'Subtopic':subtopic,'Dataset':Dataset,'ProgramName':ProgramName,'ProgramAbbr':ProgramAbbr,'DatasetCode':code}

예제 #41

0

파일 보기

파일: metadata.py 프로젝트: winnerczr/StarFlow

def LastTimeChanged(path):
    '''
    Returns last time, according to runtime meta data, that a  file (at "path")
    was actually modified (e.g. not simply overwritten, but actually modified.)
    '''

    actualmodtime = os.path.getmtime(path)
    if actualmodtime == FindPtime(path):
        try:
            Data = tb.tabarray(SVfile=metapath,
                               delimiter=',',
                               lineterminator='\n')
            if len(Data) > 0:
                Data.sort(order=['TimeStamp'])
                Diffs = Data['Diff'].nonzero()[0]
                if len(Diffs) > 0:
                    return Data['TimeStamp'][Diffs[-1]]
                else:
                    return actualmodtime
            else:
                return actualmodtime
        except:
            return actualmodtime
    else:
        return actualmodtime

예제 #42

0

파일 보기

파일: driver.py 프로젝트: feziodoshi/mturkutils

def get_meta(selected_basic_objs=SELECTED_BASIC_OBJS,
             meta_cars=META_CARS, meta_tanks=META_TANKS,
             meta_basic=META_BASIC):
    """Mix the objectome 64 basic-level set and the car/tank subordinate
    level set"""
    assert len(np.unique(selected_basic_objs)) == 22
    si = [i for i, e in enumerate(meta_basic)
          if e['obj'] in selected_basic_objs]
    assert len(si) == 22 * 1000

    meta = meta_basic[si]
    for meta_subord in [meta_cars, meta_tanks]:
        cnames = list(meta_subord.dtype.names)
        assert list(meta_basic.dtype.names) == cnames
        cnames.remove('internal_canonical')
        cnames.remove('texture')        # contains None
        cnames.remove('texture_mode')   # contains None

        meta = tb.tabarray(
            columns=[np.concatenate([meta[e], meta_subord[e]])
                     for e in cnames],
            names=cnames)

    assert len(meta) == (22 + 30 + 30) * 1000
    # 22 basic + 30 cars + 30 tanks
    assert len(np.unique(meta['obj'])) == 22 + 30 + 30
    return meta, meta_basic, meta_cars, meta_tanks

예제 #43

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

 def test_load_save_TSV_skiprows(self):
     fname = TestDataDir + 'test3.tsv'
     self.X.saveSV(fname, printmetadict=False, 
                   metadata=['coloring', 'names'])
     X2 = tb.tabarray(SVfile=fname, skiprows=1)
     Z = self.X.copy()
     Z.coloring = {}
     self.assert_io(eq(Z, X2), fname)

예제 #44

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

 def test_load_save_TSV_nocomments(self):
     fname = TestDataDir + 'test4.tsv'
     self.X.saveSV(fname, printmetadict=False, 
                   metadata=['coloring', 'names'], comments='')
     X2 = tb.tabarray(SVfile=fname, headerlines=2)
     Z = self.X.copy()
     Z.coloring = {}
     self.assert_io(eq(Z, X2), fname)

예제 #45

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

 def test_usecols(self):
     fname = TestDataDir + 'usecols.tsv'
     self.x.saveSV(fname)
     x = tb.tabarray(SVfile=fname, usecols=[0,-1])
     names=[self.x.dtype.names[i] for i in [0,-1]]
     print x,x.dtype.names
     print self.x[names],names
     self.assert_io(eq(x, self.x[names]), fname)

예제 #46

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

 def test_nohash(self):
     fname = TestDataDir + 'nohash.tsv'
     self.x.saveSV(fname, comments='')
     f = open(fname, 'r').read()
     g = open(fname, 'w')
     g.write('this is my file\n' + f)
     g.close()
     x = tb.tabarray(SVfile=fname, headerlines=2)
     self.assert_io(eq(x, self.x), fname)

예제 #47

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

    def test_linefixer(self):
        fname = TestDataDir + 'linefixer.txt'
        X1 = self.X.copy()
        X1.coloring = {}
        X1.saveSV(fname, delimiter='@')   
        X2 = tb.tabarray(SVfile=fname, 
                         linefixer=(lambda x: x.replace('@','\t')))

        self.assert_io(eq(X1, X2), fname)

예제 #48

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

 def setUp(self):
     V1 = ['North', 'South', 'East', 'West']
     V2 = ['Service', 'Manufacturing', 'Education', 'Healthcare']
     Recs = [(a, b, np.random.rand() * 100, np.random.randint(100)) 
             for a in V1 for b in V2]
     self.X = tb.tabarray(records=Recs,         
                        names=['Region', 'Sector', 'Amount', 'Population'], 
                        coloring={'zoo': ['Region','Sector'], 
                                  'york': ['Population','Sector','Region']})

예제 #49

0

파일 보기

파일: test_tabular.py 프로젝트: qbilius/tabular

 def setUp(self):
     V1 = ['North', 'South', 'East', 'West']
     V2 = ['Service', 'Manufacturing', 'Education', 'Healthcare']
     Recs = [(a, b, np.random.rand() * 100, np.random.randint(100000), 
              np.random.rand(), 'Yes' if np.random.rand() < .5 else 'No') 
             for a in V1 for b in V2]
     self.X = tb.tabarray(records=Recs, names=['Region', 'Sector', 'Amount', 
                                  'Population', 'Importance', 'Modernized'])
     self.keycols = ['Region', 'Sector']
     self.others = [['Amount', 'Population'], ['Importance', 'Modernized']]

예제 #50

0

파일 보기

 def parse_imagenet_meta_data(self, results):
     """
     Parses the meta data from tfrecords into a tabarray
     """
     meta_keys = ["labels"]
     meta = {}
     for k in meta_keys:
         if k not in results:
             raise KeyError('Attribute %s not loaded' % k)
         meta[k] = np.concatenate(results[k], axis=0)
     return tb.tabarray(columns=[list(meta[k]) for k in meta_keys], names = meta_keys)