Exemple #1
0
def tabularmetadataforms(pathlist,
                         depends_on=WORKING_DE.relative_metadata_dir):
    attlist = ['description', 'author', 'title', 'keywords']
    recs1 = []
    recs2 = []
    for x in pathlist:
        print x
        mdp = metadatapath(x) + '/ProcessedMetaData.pickle'
        if PathExists(mdp):
            M = pickle.load(open(mdp))
            D = {}
            for att in attlist:
                if att in M.keys():
                    D[att] = M[att]
                else:
                    D[att] = ''
            recs1.append((x, ) +
                         tuple([D[att].replace('\n', ' ') for att in attlist]))
            colnames = M['colnames']
            if 'coldescrs' in M.keys():
                coldescrs = [
                    M['coldescrs'][m] if m in M['coldescrs'].keys() else ''
                    for m in colnames
                ]
            else:
                coldescrs = [''] * len(colnames)

            recs2 += zip([x] * len(colnames), colnames, coldescrs)

    X = tb.tabarray(records=recs1, names=['Path'] + attlist)
    Y = tb.tabarray(records=recs2, names=['Path', 'ColName', 'ColDescr'])

    return [X, Y]
def tabularmetadataforms(pathlist,depends_on = '../System/MetaData/'):
	attlist = ['description','author','title','keywords']
	recs1 = []
	recs2 = []
	for x in pathlist:
		print x
		mdp = metadatapath(x) + '/ProcessedMetaData.pickle'
		if PathExists(mdp):
			M = pickle.load(open(mdp))
			D = {}
			for att in attlist:
				if att in M.keys():
					D[att] = M[att]
				else:
					D[att] = ''
			recs1.append((x,) + tuple([D[att].replace('\n',' ') for att in attlist]))
			colnames = M['colnames']
			if 'coldescrs' in M.keys():
				coldescrs = [M['coldescrs'][m] if m in M['coldescrs'].keys() else ''  for m in colnames]
			else:
				coldescrs = ['']*len(colnames)
			
			recs2 += zip([x]*len(colnames),colnames,coldescrs)		
		
	X = tb.tabarray(records = recs1,names=['Path'] + attlist)
	Y = tb.tabarray(records = recs2,names = ['Path','ColName','ColDescr'])
	
	return [X,Y]
Exemple #3
0
def ParseFiles(depends_on=(root_wf + '1To10000.html', root_wf +
                           '10001To20000.html', root_wf + '20001To30000.html',
                           root_wf + '30001To40000.html'),
               creates=root_wf + 'WordFrequencies.csv'):

    Words = []
    Freqs = []
    Rank = []
    for (j, x) in enumerate(depends_on):
        Soup = BeautifulSoup(open(x, 'r'))
        P = Soup.findAll('p')
        count = 0
        for (i, p) in enumerate(P):
            print 'processing', x, ', group', i
            A = p.findAll('a')
            if len(A) > 10:
                C = Contents(p).replace(' = ', ' ').split(' ')
                newwords = C[::2]
                newfreqs = C[1::2]
                Words += newwords
                Freqs += newfreqs
                Rank += range(1 + j * 10000 + count,
                              1 + j * 10000 + count + len(newwords))
                count += len(newwords)

    tb.tabarray(columns=[Words, Freqs, Rank],
                names=['Word', 'Frequency', 'Rank']).saveSV(creates,
                                                            delimiter=',')
Exemple #4
0
def _normalize_array(x):
    flour = tb.tabarray(columns=[x['flour_all'] + x['flour_almond'] + \
            x['flour_bread'] + x['flour_cake'] + x['flour_other'] + \
            x['flour_rice'] + x['flour_wheat'] + x['flour_rye']], \
            names=['flour'])

    liquid = tb.tabarray(columns=[x['milk'] + x['water'] + \
            x['soymilk'] + x['buttermilk'] + x['juice_apple'] + \
            x['juice_can'] + x['juice_lemon'] + x['juice_lime'] + \
            x['juice_orange'] + x['juice_other'] + x['juice_pineapple']], \
            names=['liquid'])

    fat = tb.tabarray(columns=[x['butter'] + x['cream_cheese'] + \
            x['cream_half'] + + x['cream_ice'] + x['cream_other'] + \
            x['cream_tartar'] + x['cream_whipped'] + x['margarine'] + \
            x['oil_canola'] + x['oil_olive'] + x['oil_other'] + \
            x['oil_vegetable'] + x['lard'] + x['shortening']], \
            names=['fat'])

    sugar = tb.tabarray(columns=[x['sugar_brown'] + x['sugar_powder'] + \
            x['sugar_white']], names=['sugar'])

    a = x[['egg']].colstack(fat).colstack(flour).colstack(liquid)\
            .colstack(sugar).extract()
    a = a / np.repeat(a.sum(axis = 1), a.shape[1])\
            .reshape(a.shape[0], a.shape[1])
    return a
def make_background_db(
    creates="../background_certificate.txt", depends_on=("../3d_hdr_backgrounds.csv", "../2d_grayscale_backgrounds.csv")
):

    conn = pm.Connection()
    db = conn["dicarlocox_3dmodels"]
    db.drop_collection("3d_spherical_backgrounds")

    coll = db["3d_spherical_backgrounds"]

    recs = [
        {"name": "backlot", "path": "backlot.tdl"},
        {"name": "apartment", "path": "apartment.tdl"},
        {"name": "empty room", "path": "empty_room.tdl"},
        {"name": "office", "path": "office.tdl"},
    ]

    for rec in recs:
        coll.insert(rec)

    X = tb.tabarray(SVfile=depends_on[0])
    recs = [{"name": x["Path"][:-4], "path": x["Path"], "description": x["Description"], "type": "3d hdr"} for x in X]
    for rec in recs:
        coll.insert(rec)

    X = tb.tabarray(SVfile=depends_on[1])
    recs = [{"name": x["Path"][:-4], "path": x["Path"], "type": "2d grayscale"} for x in X]
    for rec in recs:
        coll.insert(rec)

    rec = {"name": "blank gray image", "path": "gray.td", "type": "blank"}
    coll.insert(rec)
Exemple #6
0
def TestPivot2():
    X = tb.tabarray(records=[('x', 1, 3, 6), ('y', 0, 3, 1), ('x', 0, 3, 5)], 
                    names=['a', 'b', 'c', 'd'])
    Y = X.pivot('b', 'a')
    Z = tb.tabarray(records=[(0, 3, 3, 5, 1), (1, 3, 0, 6, 0)], 
                    names=['b', 'x_c', 'y_c', 'x_d', 'y_d'])
    assert (Y == Z).all()
Exemple #7
0
def specific_config_gen(IC,args):
    IC.base_dir = args['base_dir']
    IC.annotate_dir = args['annotate_dir']
    IC.groundtruth_dir = args['groundtruth_dir']
    IC.correspondence = tb.tabarray(SVfile = args['frame_correspondence'])
    IC.size = args['size']
    IC.prefix = prefix = args.get('image_extension','.jpg')
    IC.current_frame_path = None
    csvs = [x for x in os.listdir(IC.annotate_dir) if x.endswith('.csv')]
    csvs.sort()
    Xs = [tb.tabarray(SVfile = os.path.join(IC.annotate_dir,csv)) for csv in csvs]
    cns = [csv.split('.')[0] for csv in csvs]
    cns = [[cn]*len(X) for (cn,X) in zip(cns,Xs)]
    Xs = [X.addcols(cn,names=['clip_num']) for (cn,X) in zip(cns,Xs)]

    csvs = [x for x in os.listdir(IC.groundtruth_dir) if x.endswith('.csv')]
    csvs.sort()
    Gs = []
    fields = ['clip_num','Frame'] + xfields + yfields
    for ind,csv in enumerate(csvs):
        try:
            g = tb.tabarray(SVfile = os.path.join(IC.groundtruth_dir,csv))
        except:
            x = Xs[ind].addcols([-1]*len(Xs[ind]),names=['Correctness'])
        else:
            g = g.addcols([csv.split('.')[0]]*len(g),names = ['clip_num'])
            g = g[fields + ['Confidence']]
            g.renamecol('Confidence','Correctness')
            x = Xs[ind].join(g,keycols=fields)
        Gs.append(x)
    X = tb.tab_rowstack(Gs)
    X.sort(order=['clip_num','Frame'])
    
    Y = IC.correspondence
    F = tb.fast.recarrayisin(Y[['clip_num','Frame']],X[['clip_num','Frame']])
    Y = Y[F]
    X = X.join(Y,keycols=['clip_num','Frame'])

    params = []
    for t in X:
        print(t)  
        cn = t['clip_num']
        fr = t['Frame']
        box = get_darpa_box(t)
        bb = box.pop('box')
        xc,yc = bb.center
        center = correct_center((xc,yc),IC.size,(1920,1080))
        bb_new = bbox.BoundingBox(center = center,width = IC.size[0], height = IC.size[1])
        p = SON([('size',IC.size),
                     ('bounding_box',SON([('xfields',list(bb_new.xs)),('yfields',list(bb_new.ys))])),
                     ('original_bounding_box',SON([('xfields',list(bb.xs)),('yfields',list(bb.ys))])),
                     ('clip_num',cn),
                     ('Frame',int(t['Original'])),
                     ('base_dir',IC.base_dir),
                     ('correctness',int(t['Correctness']))])
        p.update(box)
        p['GuessObjectType'] = p['ObjectType']
        p['ObjectType'] = p['ObjectType'] if t['Correctness'] == 1 else ''
        params.append(SON([('image',p)]))
    return params
Exemple #8
0
def apply_binary(din='../data/compas',
                 froot='compas',
                 neg_names=None,
                 prefix=''):

    ftrain = os.path.join(din, '%s%s_train.out' % (prefix, froot))
    ftrain_label = os.path.join(din, '%s%s_train.label' % (prefix, froot))
    ftest = os.path.join(din, '%s_test.csv' % froot)
    fout = os.path.join(din, '%s%s_test.out' % (prefix, froot))
    flabel = os.path.join(din, '%s%s_test.label' % (prefix, froot))

    x = tb.tabarray(SVfile=ftest)

    if (neg_names is None):
        neg_names = [n.replace(':', ':not-') for n in x.dtype.names]

    y = tb.tabarray(array=(1 - x.extract()), names=neg_names)

    names = list(x.dtype.names) + list(y.dtype.names)
    positive_label_name = x.dtype.names[-1]
    negative_label_name = y.dtype.names[-1]

    x = x.colstack(y)
    recs = [x[negative_label_name], x[positive_label_name]]

    x = x.extract()
    d = dict(zip(names, [x[:, i] for i in range(len(names))]))

    print 'reading rules from', ftrain
    rule_descr = [
        line.strip().split()[0]
        for line in open(ftrain, 'rU').read().strip().split('\n')
    ]

    print 'extracting these rules from', ftest
    out = []
    for descr in rule_descr:
        rule = [clause for clause in descr.strip('{}').split(',')]
        bv = np.cast[str](np.cast[int](np.array([
            (d[name] == 1) for name in rule
        ]).all(axis=0)))
        out.append('%s %s' % (descr, ' '.join(bv)))

    print 'writing', fout
    f = open(fout, 'w')
    f.write('\n'.join(out) + '\n')
    f.close()

    print 'writing', flabel
    labels = [
        line.split()[0]
        for line in open(ftrain_label, 'rU').read().strip().split('\n')
    ]
    f = open(flabel, 'w')
    f.write('\n'.join([
        '{%s} %s' % (l, ' '.join(np.cast[str](np.cast[int](r))))
        for (l, r) in zip(labels, recs)
    ]) + '\n')
    f.close()
Exemple #9
0
def test_bionumbers():
    X = tb.tabarray(SVfile = 'tests/bionumbers.txt') 
    fname = TestDataDir + 'bionumbers.txt'
    X.saveSV(fname, quotechar="'")
    Y = tb.tabarray(SVfile = TestDataDir + 'bionumbers.txt',quotechar="'")
    names = ('ID', 'Property', 'Organism', 'Value', 'Units', 'Range', 
              'NumericalValue', 'Version')               
    assert_bio(X.dtype.names == names and len(X) == 4615 and eq(X,Y), fname)
Exemple #10
0
 def test_missingvals4(self):
     fname = TestDataDir + 'missingvals4.csv'
     F = open(fname,'w')
     F.write('Name,Age,Gender\nDaniel,12,M\nElaine,'',F\nFarish,46,')
     F.close()
     X = tb.tabarray(SVfile=fname)
     X2 = tb.tabarray(records=[('Daniel', 12, 'M'), ('Elaine', np.nan, 'F'), ('Farish', 46, '')],names=['Name','Age','Gender'])
     self.assert_io(eq(X, X2), fname)                        
Exemple #11
0
def makemetadata(code,datadir,outfile1,outfile2,depends_on = (resource_root + 'ProcessedManifest_2_HandAdditions.tsv',resource_root + 'Keywords.txt')):

    Z  = {}

    keyword_file = depends_on[1]
    Y = tb.tabarray(SVfile = keyword_file)[['Code','Keywords']]
    y = Y[Y['Code'] == code]
    Z['keywords'] = [x.strip() for x in str(y['Keywords'][0]).split(',')]
    
    
    dirl = np.array(listdir(datadir))
    
    pr = lambda x : x.split('!')[-1][:-4]
    p=re.compile('\([^\)]*\)')
    
    tps = [l for l in dirl if l.endswith('.txt.txt')]
    if tps:
        textpath = datadir + tps[0]
        [SD,things] = ParseTexts(textpath,code)
        FNs = [p.sub('',things[pr(y).lower()]).replace(' ,',',').replace(',,',',') if pr(y).lower() in things.keys() else '' for y in dirl]
        FNs = [z.split('=')[1] if '=' in z and not ' =' in z else z for z in FNs]
    else:
        SD = ''
        FNs = len(dirl)*['']
        
    Z['description'] = SD
    
    cfs = [l for l in dirl if l.endswith('.contacts.txt')]
    if cfs:
        contactfile = datadir + cfs[0]
        ctext = open(contactfile,'rU').read().strip()
        if '<html>' in ctext.lower():
            clines = ctext.split('\n')
            fb = [i for i in range(len(clines)) if clines[i].strip() == ''][0]
            ctext = '\n'.join(clines[fb+1:])
        ctext = ctext.strip(' *\n').replace('\n\n','\n')    
    else:
        ctext = ''
        
    Z['contactInfo'] = ctext
    f = open(outfile1,'w')
    pickle.dump(Z,f)
    f.close()

    Y = tb.tabarray(SVfile = depends_on[0])
    Y.sort(order = ['File'])

    
    dirlp = np.array([pr(y) for y in dirl])
    [A,B] = tb.fast.equalspairs(dirlp,Y['File'])
    if (B>A).any():
        print 'adding hand-made content to', dirlp[B>A]
        for k in (B>A).nonzero()[0]:
            FNs[k] = Y['FileName'][A[k]]    
    
    D = tb.tabarray(columns=[dirl,FNs], names = ['Path','FileName'])
    
    D.saveSV(outfile2,metadata = True)  
Exemple #12
0
def TestReplace2():
    V1 = ['North', 'South', 'East', 'West']
    V2 = ['Service', 'Manufacturing', 'Education', 'Healthcare']
    Recs = [(a, b, np.random.rand() * 100, np.random.randint(100000)) 
                                                       for a in V1 for b in V2]
    X = tb.tabarray(records=Recs, 
                    names=['Region', 'Sector', 'Amount', 'Population'])
    X2 = tb.tabarray(records=Recs, 
                     names=['Region', 'Sector', 'Amount', 'Population'])

    X.replace('S', 'M')
    assert((X == X2).all())
Exemple #13
0
def get_subclass_pages():
    X = tb.tabarray(SVfile = 'catlevels.tsv')
    recs = []
    p = re.compile('Sub\d')
    f = lambda x : p.match(dict(x.attrs).get('class',''))
    for x in X[:]:
        subrecs = []
        cat = x['CLASS']
        fixed_cat = fix_cat(cat)
        title = x['CLASS TITLE']
        os.system('wget http://www.uspto.gov/web/patents/classification/uspc' + fixed_cat + '/sched' + fixed_cat + '.htm -O ' + cat + '.html')
        Soup = BeautifulSoup.BeautifulSoup(open(cat + '.html'))
        Crac = Soup.find(True,'CracHeader')
        For = Soup.find(True,'ForHeader')
        Dig = Soup.find(True,'DigHeader')
        if Crac:
            end = Crac
        elif For:
            end = For
        elif Dig:
            end = Dig
        else:
            end  = None
        if end:
            T = end.findAllPrevious('tr',valign='top')[:]
        else:
            T = Soup.findAll('tr',valign='top')[:]
        T.reverse()
        for (i,t) in enumerate(T): 
            try:
                subclass = str(Contents(t.find(f)).replace('&nbsp;','').strip())
            except:
                pass
            else:
                try:
                    subtitle = Contents(t.find(True,"SubTtl")).strip()
                except:
                    pass
                else:
                    try:
                        indent = int(dict(t.find(True,"SubTtl").find("img").attrs)['src'].split('/')[-1].split('_')[0])
                    except AttributeError:
                        indent = 0
                    #print (cat,title,subclass,subtitle,indent)    
                    subrecs.append((cat,title,subclass,subtitle,indent))
        subrecs.reverse()
        recs.extend(subrecs)

    Y = tb.tabarray(records = recs, names=['Class','Title','Subclass','Subtitle','Indent'],formats=['str','str','str','str','int'])
    Y.saveSV('classifications.tsv',metadata=True)
Exemple #14
0
def TestPivot3():
    V1 = ['NorthAmerica', 'SouthAmerica', 'Europe', 'Asia', 'Australia', 
          'Africa', 'Antarctica']
    V1.sort()
    V2 = ['House', 'Car', 'Boat', 'Savings', 'Food', 'Entertainment', 'Taxes']
    V2.sort()
    Recs = [(a, b, 100 * np.random.rand()) for a in V1 for b in V2]
    X = tb.tabarray(records=Recs, names=['Region', 'Source', 'Amount'])
    Y = X.pivot('Region', 'Source')
    Z = utils.uniqify(X['Source'])
    Z.sort()
    Cols = [[y['Amount'] for y in X if y['Source'] == b] for b in Z]
    W = tb.tabarray(columns=[V1] + Cols, 
                    names=['Region'] + [b + '_Amount' for b in Z])
    assert (W == Y).all()
Exemple #15
0
def TestPivot4():
    V1 = ['NorthAmerica', 'SouthAmerica', 'Europe', 'Asia', 'Australia', 'Africa', 'Antarctica']
    V1.sort()
    V2 = ['House', 'Car', 'Boat', 'Savings', 'Food', 'Entertainment', 'Taxes']
    V2.sort()
    Recs = [(a, b, 100 * np.random.rand()) for a in V1 for b in V2]
    X = tb.tabarray(records=Recs[:-1],
                    names=['Region', 'Source', 'Amount'])
    Y = X.pivot('Region', 'Source', 
                NullVals=dict([(o,-999) for o in X.dtype.names]))
    X2 = tb.tabarray(records=Recs, names=['Region', 'Source', 'Amount'])
    Y2 = X.pivot('Region','Source')
    Y2[V2[-1] + '_Amount'][-1] = -999

    assert (Y == Y2).all()
Exemple #16
0
 def test_load_save_TSV_infer2(self):
     fname = TestDataDir + 'test2.tsv'
     self.X.saveSV(fname, printmetadict=False, 
                   metadata=['coloring', 'names'])
     X2 = tb.tabarray(SVfile=fname, 
                      metametadata={'coloring': 0, 'names': 1})
     self.assert_io(eq(self.X, X2), fname)
Exemple #17
0
def get_results(mean,std,ext_hash,splitfilename,outfile):
    conn = pm.Connection(document_class = SON)
    db = conn['thor']
    fcol = db['features.files']
    split_fs = gridfs.GridFS(db,'split_performance')
    fh = split_fs.get_version(splitfilename)
    r = cPickle.loads(fh.read())
    r = r['split_result']['cls_data']
    weights = r['coef']
    bias = r['intercept']
    L = fcol.find({'__hash__':ext_hash},fields=['image.clip_num','image.Frame','feature','image.bounding_box'])
    recs = []
    names = ['clip_num','frame','x1','x2','x3','x4','y1','y2','y3','y4'] + labels    
    for l in L:
        cn = str(l['image']['clip_num'])
        fr = l['image']['Frame']
        print(l['_id'],cn,fr)
        bx = l['image']['bounding_box']['xfields']
        by = l['image']['bounding_box']['yfields']
        feat = l['feature']
        feat = (feat - mean)/std
        m = sp.dot(feat,weights) + bias
        rec = (cn,fr,) + tuple(bx) + tuple(by) + tuple(m)
        recs.append(rec)
        if len(recs) == 10000:
            X = tb.tabarray(records = recs, names = names)
            tb.io.appendSV(outfile,X,metadata=True)
            recs = []
Exemple #18
0
 def setUp(self):
     self.D = tb.tabarray(
              array=[(2, 'a', 2, 'cc', 3.0), (2, 'b', 5, 'dcc', 2.0), 
                     (7, 'e', 2, 'cjc', 8.0), (2, 'e', 2, 'ccj', 3.0)], 
              names=['a', 'c', 'b', 'd', 'e'], formats='i4,|S1,i4,|S3,f8', 
              coloring={'moo': ['a', 'b'], 'boo': ['a', 'd', 'e']})
     self.Root = 'basic'
Exemple #19
0
 def setUp(self):
     names = ['name', 'ID', 'color', 'size', 'June', 'July']
     data = [('bork', 1212, 'blue', 'big', 45.32, 46.07), 
             ('mork', 4660, 'green', 'small', 32.18, 32.75), 
             ('stork', 2219, 'red', 'huge', 60.93, 61.82), 
             ('lork', 4488, 'purple', 'tiny', 0.44, 0.38)]
     self.x = tb.tabarray(records=data, names=names)
Exemple #20
0
 def test_toload_redundant_tsv(self):
     toload = ['a', 'boo']
     fname = TestDataDir + self.Root + '6.tsv'
     self.D.saveSV(fname, metadata=['names', 'formats', 'types', 'coloring', 'dialect'])
     D = tb.tabarray(SVfile=fname, usecols=toload)
     assert set(D.dtype.names) == set(D.coloring['boo'])
     self.assert_io(eq(self.D[toload], D[toload]), fname)
Exemple #21
0
    def test_strictjoin3(self):
        X = self.X
        keycols = self.keycols
        others=self.others
        X1 = X[:(3 * len(X) / 4)][keycols + others[0]]
        X2 = X[(len(X) / 4):][keycols + others[1]]
        Y = spreadsheet.strictjoin([X1, X2], self.keycols)
        Y.sort(order=keycols)

        nvf = utils.DEFAULT_NULLVALUEFORMAT
        nvf1 = nvf(X[others[1][0]].dtype.descr[0][1])
        nvf2 = nvf(X[others[1][1]].dtype.descr[0][1])
        nvf3 = nvf(X[others[0][0]].dtype.descr[0][1])
        nvf4 = nvf(X[others[0][1]].dtype.descr[0][1])

        Recs = ([(a, b, c, d, nvf1, nvf2) for (a, b, c, d, e, f) 
                                         in X[:(len(X) / 4)]] + 
                [(a, b, c, d, e, f) for (a, b, c, d, e, f) 
                                   in X[(len(X) / 4):(3 * len(X) / 4)]] + 
                [(a, b, nvf3, nvf4, e, f) for (a, b, c, d, e, f) 
                                         in X[(3 * len(X) / 4):]])
        Z = tb.tabarray(records=Recs, names=X.dtype.names)
        Z.sort(order=self.keycols)

        self.assert_((Y == Z).all())
Exemple #22
0
 def test_load_save_CSV_infer(self):
     fname = TestDataDir + 'test.csv'
     self.X.saveSV(fname)
     X2 = tb.tabarray(SVfile=fname)  # normal scenario: names, no comments
     Z = self.X.copy()
     Z.coloring = {}
     self.assert_io(eq(Z, X2), fname)
Exemple #23
0
def applysplitter(manifest,splitdir):
	MakeDir(splitdir)
	M = tb.tabarray(SVfile = manifest)
	vals = tb.uniqify(M['Prefix'])
	for v in vals:
		Mv = M[M['Prefix'] == v]
		Mv.saveSV(splitdir + 'Manifest_' + pathprocessor([v]) + '.tsv', metadata=True)
Exemple #24
0
 def test_load_save_TSV_infer(self):
     fname = TestDataDir + 'test.tsv'
     self.X.saveSV(fname)
     X2 = tb.tabarray(SVfile=fname)
     Z = self.X.copy()
     Z.coloring = {}
     self.assert_io(eq(Z, X2), fname)
Exemple #25
0
def get_meta(selected_basic_objs=SELECTED_BASIC_OBJS):
    """Mix the objectome 64 basic-level set and the chair subordinate
    level set"""
    assert len(np.unique(selected_basic_objs)) == 30
    meta_chairs = pk.load(open('meta_objt_chairs_subord_v3.pkl'))
    meta_basic = pk.load(open('meta_objt_full_64objs.pkl'))

    si = [
        i for i, e in enumerate(meta_basic) if e['obj'] in selected_basic_objs
    ]
    assert len(si) == 30 * 1000

    cnames = list(meta_chairs.dtype.names)
    assert list(meta_basic.dtype.names) == cnames
    cnames.remove('internal_canonical')
    cnames.remove('texture')  # contains None
    cnames.remove('texture_mode')  # contains None

    meta = tb.tabarray(columns=[
        np.concatenate([meta_basic[e][si], meta_chairs[e]]) for e in cnames
    ],
                       names=cnames)
    assert len(meta) == 30 * 1000 * 2
    assert len(np.unique(meta['obj'])) == 60  # 30 non-chairs + 30 chairs
    return meta, meta_basic, meta_chairs
Exemple #26
0
def check_stamps(metadatafile,imagedir,train_frames,outdir):
    os.mkdir(outdir)
    metadata = tb.tabarray(SVfile=metadatafile)
    #get labels for training objects

    train_labels_inds = []
    for cn,fr in train_frames:
        inds = ((metadata['Frame'] == fr) & (metadata['clip_num'] == cn) & (metadata['ObjectType'] != 'DCR')).nonzero()[0]
        #ind = inds[t['object_number']]
        train_labels_inds.extend(inds)
    train_labels = metadata[train_labels_inds]
    #get stamps for training objects

    train_points = []
    train_points_labels = []
    sizes = []
    num_train = 0
    for label in train_labels:
        lbl = label['clip_num'] + '_' + str(label['Frame']) + '.jpg'
        print(label)
        framefile = os.path.join(imagedir,lbl)
        im = get_image(framefile)

        box = bbox.BoundingBox(xs = [label[xf] for xf in xfields],
                               ys = [label[yf] for yf in yfields])
        stamp = bbox.stamp(im,box,stamp_shape=(200,200))[0]
        if stamp is not None:
            img = Image.fromarray(stamp)
            img.save(os.path.join(outdir,str(num_train) + '.jpg'))
            num_train += 1
def parser2():
    files_to_parse = [x for x in os.listdir('.') if x.endswith('_file.html')]
    
    for file in files_to_parse:
        print('parsing',file)
        #next step of getting data for each water system
        H20Systems = BeautifulSoup.BeautifulSoup(open(file))
        table = H20Systems.findAll('table')[3].findAll('table')[7]
        TR = table.findAll('tr',bgcolor='#F5F5F5')
        Links = [str(dict(tr.findAll('td')[1].findAll('a')[0].attrs)['href']) for tr in TR]
        Names = [utils.Contents(tr.findAll('td')[1]) for tr in TR]
        Number = [utils.Contents(tr.findAll('td')[2]).replace('&nbsp;',' ') for tr in TR]
        County = [utils.Contents(tr.findAll('td')[3]) for tr in TR]
        outname = file.split('_')[0] + '_file.tsv'

        tb.tabarray(columns = [Links,Names,Number,County],names=['Links','Names','Number','County']).saveSV(outname,metadata=True)
    def get_data(self):

        trial_records = []
        obj_oi = np.unique(self.meta["obj"])
        img_oi = np.unique(self.meta["id"])

        for subj in self.data:
            for r, i, ss in zip(subj["Response"], subj["ImgData"], subj["StimShown"]):
                if len(i) > 1:
                    s = i["Sample"]
                    t = i["Test"]
                    s_id = s["id"]
                    s_obj = s["obj"]
                    t_obj = [t_["obj"] for t_ in t]
                    d_obj = [t_ for t_ in t_obj if t_ != s_obj][0]
                    resp = t_obj[r]
                else:  # backwards compatibility with previous mturkutils
                    s_id = i[0]["id"]
                    s_obj = i[0]["obj"]
                    t_obj = [strip_objectomeFileName(fn) for fn in ss[1:]]
                    d_obj = [t_ for t_ in t_obj if t_ != s_obj][0]
                    resp = strip_objectomeFileName(r)

                if (s_id in img_oi) & (d_obj in obj_oi):
                    rec_curr = (s_obj,) + (d_obj,) + (resp,) + (s_id,) + (subj["WorkerID"],) + (subj["AssignmentID"],)
                    trial_records.append(rec_curr)

        self.trials = tb.tabarray(records=trial_records, names=self.trial_kw_names, formats=self.trial_kw_formats)
        return
def create_metadata():
# Metadata
    name = 'riskfactors'
    source = {
	"agency": {"shortName": "DHHS", "name": "Department of Health and Human Services"},
	"subagency": {"shortName": "CDC", "name": "Centers for Disease Control and Prevention"},
	"topic": {"name": "Health and Nutrition"},
	#"subtopic": {"name": "Release Quantity Data"},
	"program": {"shortName": "CHSI", "name": "Community Health Status Indicators"},
	"dataset": {"shortName": "ohdc", "name": "Community Health Status Indicators (CHSI) to Combat Obesity, Heart Disease and Cancer"}
}
    Y = tb.tabarray(SVfile = 'DATAELEMENTDESCRIPTION.csv')
    metadata = {
	'title':'Community Health Status Indicators (CHSI) to Combat Obesity, Heart Disease and Cancer',
	'description':"Community Health Status Indicators (CHSI) to combat obesity, heart disease, and cancer are major components of the Community Health Data Initiative. This dataset provides key health indicators for local communities and encourages dialogue about actions that can be taken to improve community health (e.g., obesity, heart disease, cancer). The CHSI report and dataset was designed not only for public health professionals but also for members of the community who are interested in the health of their community. The CHSI report contains over 200 measures for each of the 3,141 United States counties. Although CHSI presents indicators like deaths due to heart disease and cancer, it is imperative to understand that behavioral factors such as obesity, tobacco use, diet, physical activity, alcohol and drug use, sexual behavior and others substantially contribute to these deaths.",
	'keywords':['Obesity','CHSI','health','data','community','indicators','interventions','performance','measurable','life expectancy','mortality','disease','prevalence','risk','factors','behaviors','socioeconomic','environments','access','cost','quality','warehouse','heart','cancer'],
	'uniqueIndexes':['Location'],
	'sliceCols':[['Location']],
	'columnGroups':{
		'spaceColumns':['Location'],
		'labelColumns':['Location']
	},

       'columnDescriptions': dict([(y['COLUMN_NAME'],y['DESCRIPTION']) for y in Y]),
       'source':{
        "agency": {"shortName": "DHHS", "name": "Department of Health and Human Services"},
        "subagency": {"shortName": "CDC", "name": "Centers for Disease Control and Prevention"},
        "topic": {"name": "Health and Nutrition"},
        #"subtopic": {"name": "Release Quantity Data"},
        "program": {"shortName": "CHSI", "name": "Community Health Status Indicators"},
        "dataset": {"shortName": "ohdc", "name": "Community Health Status Indicators (CHSI) to Combat Obesity, Heart Disease and Cancer"}
         }

}
    return metadata
def FindPtime(target,Simple=False):
	'''
	Returns last time, according to runtime meta data, that 
	a target was succesfully created, if it is created data. 
	'''

	metapath = metadatapath(target) + '/CreationRecord.csv'
	if PathExists(metapath):
		try: 
			Data = tb.tabarray(SVfile = metapath,delimiter = ',', lineterminator='\n') 
			if len(Data) > 0:
				Data.sort(order=['TimeStamp'])
				if any(Data['ExitType'] == 'Success'):
					MostRecentSuccess = Data[Data['ExitType'] == 'Success']['TimeStamp'][-1]
					MoreRecentFailures = Data[(Data['ExitType'] == 'Failure') & (Data['TimeStamp'] > MostRecentSuccess)]
					if len(MoreRecentFailures) > 0:
						LeastRecentFailure = MoreRecentFailures['TimeStamp'][0]
					else:
						LeastRecentFailure = numpy.inf
					return Data[(Data['TimeStamp'] >= MostRecentSuccess) & (Data['TimeStamp'] < LeastRecentFailure)]['TimeStamp'][-1] 
				else:
					return numpy.nan
			else:
				return numpy.nan
		except:
			return numpy.nan
		else: pass
	else:
		return numpy.nan			
Exemple #31
0
    def create_exp_plan(self):
        """Define each trial's parameters
        """
        df = pandas.read_csv('../data/snodgrass.csv', sep='\t')
        df['imgno'] = range(1, self.trials_per_hit + 1)
        df = pandas.concat([df for i in range(self.nsubj)])
        df['subjid'] = np.repeat(range(self.nsubj), self.trials_per_hit)
        df['order'] = np.hstack([
            np.random.permutation(self.trials_per_hit)
            for i in range(self.nsubj)
        ])
        # df['kind'] = np.repeat(['color', 'gray', 'silhouette'], self.trials_per_hit * self.nsubj // 3)
        df['kind'] = np.repeat(['color'], self.trials_per_hit * self.nsubj)
        df['isi1'] = 500
        df['stim_dur'] = 100
        df['isi2'] = 500
        df['subj_resp'] = None
        df['acc'] = np.nan
        df['rt'] = np.nan

        df = df.sort_values(by=['subjid', 'order'])
        rec = df.to_records(index=False)
        exp_plan = tb.tabarray(array=rec, dtype=rec.dtype)
        if self.save:
            self.save_exp_plan(exp_plan)
        return exp_plan
Exemple #32
0
    def fetch(self, wnids, seed=None, num_per_synset=100, firstonly=False, path=os.getcwd(),
              username='******', accesskey='bd662acb4866553500f17babd5992810e0b5a439'):
        """
        Stores a random #num images for synsets specified by wnids from the latest release to path specified
        Since files are stored as tar files online, the entire synset must be downloaded to access random images.

        If 'all' is passed as the num argument, all images are stored.

        If the argument firstonly is set to true, then download times can be reduced by only extracting the first
        few images

        This method overwrites previous fetches: files and metadata are deleted
        """
        if not os.path.exists(path + '/'):
            os.makedirs(path + '/')
        wnids = list(wnids)
        random.seed(seed)
        kept_names = []
        kept_wnid_list = []
        if hasattr(self, '_meta'):
            files_to_remove = np.unique(self.meta['filename'])
            for file_to_remove in files_to_remove:
                try:
                    print path + '/' + file_to_remove
                    os.remove(path + '/' + file_to_remove)
                except OSError:
                    print "metadata is stale, clear cache directory"
        for i, wnid in enumerate(wnids):
            synset_names = []
            url = 'http://www.image-net.org/download/synset?' + \
                  'wnid=' + str(wnid) + \
                  '&username='******'&accesskey=' + accesskey + \
                  '&release=latest'
            print i
            url_file = urlopen(url)
            tar_file = tarfile.open(fileobj=url_file, mode='r|')
            if firstonly and not (num_per_synset == 'all'):
                keep_idx = xrange(num_per_synset)
                for i in keep_idx:
                    tarinfo = tar_file.next()
                    synset_names.append(tarinfo.name)
                    tar_file.extract(tarinfo, path)
            else:
                for tarinfo in tar_file:
                    synset_names.append(tarinfo.name)
                    tar_file.extract(tarinfo, path)
                if num_per_synset == 'all':
                    keep_idx = range(len(synset_names))
                else:
                    keep_idx = sample(range(len(synset_names)), num_per_synset)
                files_to_remove = frozenset(synset_names) - frozenset([synset_names[idx] for idx in keep_idx])
                for file_to_remove in files_to_remove:
                    os.remove(path + '/' + file_to_remove)
            kept_names.extend([synset_names[idx] for idx in keep_idx])
            kept_wnid_list.extend([wnid] * len(keep_idx))
        meta = tb.tabarray(records=zip(kept_names, kept_wnid_list), names=['filename', 'wnid'])
        self._meta = meta
        self.path = path
        tb.io.savebinary('imagenet_meta.npz', self._meta)
def get_alpha_images():
    dataset_obj = objectome64s100alpha(internal_canonical=True)
    dataset_bg = objectome64s100bg(internal_canonical=True)    
    img_res, img_size = 1024, 256
    
    obj_imgs = dataset_obj.get_images({'dtype':'uint8', 'size': (img_res, img_res,4), 'normalize':False, 'mode':'RGBA'}, get_models=True)
    bg_imgs = dataset_bg.get_images({'dtype':'uint8', 'size': (img_res, img_res), 'normalize':False, 'mode':'L'}, get_models=True)
    
    IMGS = []
    alphaval = []
    for i in xrange(obj_imgs.shape[0]):
        background = Image.fromarray(bg_imgs[i])
        foreground = obj_imgs[i]
        tmp_alpha = np.random.uniform(0.25,1)

        foreground[:,:,-1] = foreground[:,:,-1] * tmp_alpha
        alphaval.append(tmp_alpha)
        foreground = Image.fromarray(foreground)
        background.paste(foreground, (0, 0), foreground)
        IMGS.append(np.asarray(background))

    meta = dataset_obj.meta
    names = meta.dtype.names + ('obj_alpha',) 
    formats = zip(*meta.dtype.descr)[1] + ('float',)
    META = tb.tabarray(records=[tuple(meta[i]) + (alphaval[i],) for i in range(len(meta))], names=names, formats=formats)
    
    return IMGS, META
def get_monkeyturk_data(dataset="objectome24"):
    if dataset == "objectome24":
        meta_path = "/mindhive/dicarlolab/u/rishir/stimuli/objectome24s100/metadata.pkl"
        data_path = "/mindhive/dicarlolab/u/rishir/monkeyturk/allData.mat"

    meta = pk.load(open(meta_path, "r"))
    datmat = io.loadmat(data_path)
    uobjs = obj.models_combined24

    col_data_seg = {}
    trial_records = []
    subjs = ["Manto", "Zico", "Picasso", "Nano", "Magneto"]
    for sub in subjs:
        x = datmat["allData"][sub][0, 0]
        for xi in range(x.shape[0]):
            s_obj = uobjs[x[xi, 0]]
            d_obj = uobjs[x[xi, 2]]
            resp = uobjs[x[xi, 3]]
            s_id = meta[x[xi, 4] - 1]["id"]
            workid = sub
            assnid = "MonkeyTurk"

            rec_curr = (s_obj,) + (d_obj,) + (resp,) + (s_id,) + (workid,) + (assnid,)
            trial_records.append(rec_curr)

    col_data_seg["all"] = tb.tabarray(records=trial_records, names=KW_NAMES, formats=KW_FORMATS)
    for sub in subjs:
        t = col_data_seg["all"]["WorkerID"] == sub
        col_data_seg[sub] = col_data_seg["all"][t]
    return col_data_seg
def parse_lowest_level():
    files_to_parse = utils.ListUnion([[os.path.join(x,y) for y in os.listdir(x)] for x in os.listdir('.') if x.endswith('DETAILS')])

    kvpairs = []
    for file in files_to_parse:
        print(file)
        Soup = BeautifulSoup.BeautifulSoup(open(file))
        bolds = Soup.findAll('b')
        bolds = [b for b in bolds if utils.Contents(b).endswith(':')]
        newkvpairs = [(utils.Contents(b).strip(': '),utils.Contents(b.findNext()).strip()) for b in bolds][:-1]
        if len(bolds) > 0:
            newkvpairs.append((utils.Contents(bolds[-1]).strip(': '),''.join([utils.Contents(x) if utils.Contents(x) != '' else '\n' for x in bolds[-1].findNext().contents])))
    
        kvpairs.append(newkvpairs)

    tb.tabarray(kvpairs = kvpairs).saveSV('final_results.tsv',metadata=True)
Exemple #36
0
def download_images_by_synset(
        synsets,
        seed=None,
        num_per_synset='all',
        firstonly=False,
        path=None,
        imagenet_username='******',
        accesskey='bd662acb4866553500f17babd5992810e0b5a439'):
    """
    Stores a random #num images for synsets specified by synsets from the latest release to path specified
    Since files are stored as tar files online, the entire synset must be downloaded to access random images.

    If 'all' is passed as the num argument, all images are stored.

    If the argument firstonly is set to true, then download times can be reduced by only extracting the first
    few images

    Returns a meta tabarray object containing wnid and filename for each downloaded image
    """
    if path is None:
        path = os.getcwd()
    if not os.path.exists(path):
        os.makedirs(path)
    synsets = list(synsets)
    random.seed(seed)
    kept_names = []
    kept_synset_list = []
    for i, synset in enumerate(synsets):
        synset_names = []
        url = 'http://www.image-net.org/download/synset?' + \
              'wnid=' + str(synset) + \
              '&username='******'&accesskey=' + accesskey + \
              '&release=latest'
        print i
        print url
        url_file = urlopen(url)
        tar_file = tarfile.open(fileobj=url_file, mode='r|')
        if firstonly and not (num_per_synset == 'all'):
            keep_idx = xrange(num_per_synset)
            for tarinfo in tar_file:
                synset_names.append(tarinfo.name)
                tar_file.extract(tarinfo, path)
        else:
            for tarinfo in tar_file:
                synset_names.append(tarinfo.name)
                tar_file.extract(tarinfo, path)
            if num_per_synset == 'all':
                keep_idx = range(len(synset_names))
            else:
                keep_idx = sample(range(len(synset_names)), num_per_synset)
            files_to_remove = frozenset(synset_names) - frozenset(
                [synset_names[idx] for idx in keep_idx])
            for file_to_remove in files_to_remove:
                os.remove(path + '/' + file_to_remove)
        kept_names.extend([synset_names[idx] for idx in keep_idx])
        kept_synset_list.extend([synset] * len(keep_idx))
    meta = tb.tabarray(records=zip(kept_names, kept_synset_list),
                       names=['filename', 'synset'])
    return meta
Exemple #37
0
    def get(self):
        meta = tb.tabarray(SVfile=os.path.join(RESULTS_ROOT, 'meta_with_margin_test.tsv'))
        
        N = 3
        days = 4
        text = ''
        NP = 200
        NN = 200
        mdp = meta[:NP]
        mdp = mdp[np.random.RandomState(None).permutation(NP)]['filename']
        mdn = meta[-NN:]
        mdn = mdn[np.random.RandomState(None).permutation(NN)]['filename']

        for d in range(days):
            text += '<div class="entry" id="day_header_%d"><div class="entryheader"><h2 class="entrytitle">Day %d</h2>' % (d, d+1)
            text += '<div class="date">posted by <a href="/vistats_blog">Vistats</a> on 2013.06.19</div></div>'
            ctext = "So, here are my Chanel pics of the day :)"
            text += '<p class="chanel_header" id="chanel_header_day_%d">%s</p><br/>' % (d, ctext)
            chaneltext = '<div class="img_div" id="chanel_img_div_day_%d">' % d + ''.join(['<div class="show_img" id="chanel_img_day_%d_img_%d"><img src="%s/%s"></div>' % (d, _i, IMG_ROOT,x.split('/')[-1]) for _i, x in enumerate(mdp[d*N:(d+1)*N])]) + '</div>'
            text += chaneltext
            notchaneltext = '<div class="img_div" id="not_chanel_img_div_day_%d">' % d + ''.join(['<div class="show_img" id="not_chanel_img_day_%d_img_%d"><img src="%s/%s"></div>' % (d, _i, IMG_ROOT, x.split('/')[-1]) for _i, x in enumerate(mdn[d*N:(d+1)*N])]) + '</div>'
            nctext = "<br/>Hey, and of course I also have a life <b>outside</b> of Chanel :)"
            text += '<p class="not_chanel_header" id="not_chanel_header_day_%d">%s</p><br/>' % (d, nctext) + notchaneltext
            text += '</div>'

        html = HTML_TEMPLATE % text
        
        self.write(html)

        self.finish()
Exemple #38
0
def columnwider(narrow_tab): #{{{
    """ Функция для расширения столбцов

    Принимает обычную таблицу, возвращает "раздутую"

    Из-за недоработок класса tabular колонки не могут расширяться динамически,
    поэтому на придется заранее вставить в конец файла поля заведомо
    бОльшей ширины.
    """
    # crate fake row
    first_row = narrow_tab[:1] # возьмем первый ряд для определения типа колонок
    empty_tuple = ()

    for i in first_row.dtype.names:
        if (type(first_row[i][0]).__name__) == 'string_':
            empty_tuple += (column_strut,)
        else:
            empty_tuple +=('',)

    wide_row = tb.tabarray(records=(empty_tuple,), names=list(first_row.dtype.names))

    # now we have table from one empty wide row
    # stack them to input table
    wide_tab = narrow_tab.rowstack([wide_row])

    # for now wide row is unnecessary
    # remove them
    wide_tab = wide_tab[:-1]

    return wide_tab
Exemple #39
0
def HiAssocWords(page, depends_on=root_wf + 'WordFrequencies.csv'):

    CFreq = PageWordFreqs(page)

    if CFreq != None:

        WFreq = tb.tabarray(SVfile=depends_on, verbosity=0)

        WF = WFreq[tb.fast.isin(
            WFreq['Word'],
            CFreq['Word'],
        )]

        CC = CFreq.join(WF, keycols='Word', Names=['InPage', 'Overall'])

        N = float(CFreq['Frequency'].sum())

        DD = (1 /
              N) * CC['Frequency_InPage'] - 10**(-9) * CC['Frequency_Overall']
        s = DD.argsort()[::-1]
        DD = DD[s]
        CC = CC[s]
        CC = CC.addcols(DD, names='FrequencyDelta')

        return CC[['Word', 'Frequency_InPage', 'FrequencyDelta']]

    else:
        return None
Exemple #40
0
def getcategorydata(code,depends_on = os.path.join(DATA_ROOT ,'BLS_Hierarchy','Manifest_1.tsv')):
    
    manifest = depends_on
    
    X = tb.tabarray(SVfile = manifest)
   
    Codes = np.array([x.split('/')[-2] for x in X['URL']])
        
    x = X[Codes == code][0] 
    topic = str(x['Level1'])
    subtopic = str(x['Level2'])
    xx = str(x['Level3'])
    if len(xx.split(':')) > 1 and '-' in xx.split(':')[1]:
        Dataset = xx.split(':')[0].strip()      
        y = ':'.join(xx.split(':')[1:]).strip('() ')
        ProgramName = y.split('-')[0].strip()
        ProgramAbbr = y.split('-')[1]
    elif xx.strip().endswith(')'):
        Dataset = xx[:xx.find('(')].strip()
        ProgramAbbr = xx[xx.find('('):].strip(' ()')
        if not ProgramAbbr.isupper():
            ProgramAbbr = ''
        ProgramName = ''
    else:
        Dataset = xx
        ProgramName = ''
        ProgramAbbr = ''
    
    if code == 'jt':
        ProgramName = 'JOLTS'
        
        
    return {'Topic':topic,'Subtopic':subtopic,'Dataset':Dataset,'ProgramName':ProgramName,'ProgramAbbr':ProgramAbbr,'DatasetCode':code}
Exemple #41
0
def LastTimeChanged(path):
    '''
    Returns last time, according to runtime meta data, that a  file (at "path")
    was actually modified (e.g. not simply overwritten, but actually modified.)
    '''

    actualmodtime = os.path.getmtime(path)
    if actualmodtime == FindPtime(path):
        try:
            Data = tb.tabarray(SVfile=metapath,
                               delimiter=',',
                               lineterminator='\n')
            if len(Data) > 0:
                Data.sort(order=['TimeStamp'])
                Diffs = Data['Diff'].nonzero()[0]
                if len(Diffs) > 0:
                    return Data['TimeStamp'][Diffs[-1]]
                else:
                    return actualmodtime
            else:
                return actualmodtime
        except:
            return actualmodtime
    else:
        return actualmodtime
Exemple #42
0
def get_meta(selected_basic_objs=SELECTED_BASIC_OBJS,
             meta_cars=META_CARS, meta_tanks=META_TANKS,
             meta_basic=META_BASIC):
    """Mix the objectome 64 basic-level set and the car/tank subordinate
    level set"""
    assert len(np.unique(selected_basic_objs)) == 22
    si = [i for i, e in enumerate(meta_basic)
          if e['obj'] in selected_basic_objs]
    assert len(si) == 22 * 1000

    meta = meta_basic[si]
    for meta_subord in [meta_cars, meta_tanks]:
        cnames = list(meta_subord.dtype.names)
        assert list(meta_basic.dtype.names) == cnames
        cnames.remove('internal_canonical')
        cnames.remove('texture')        # contains None
        cnames.remove('texture_mode')   # contains None

        meta = tb.tabarray(
            columns=[np.concatenate([meta[e], meta_subord[e]])
                     for e in cnames],
            names=cnames)

    assert len(meta) == (22 + 30 + 30) * 1000
    # 22 basic + 30 cars + 30 tanks
    assert len(np.unique(meta['obj'])) == 22 + 30 + 30
    return meta, meta_basic, meta_cars, meta_tanks
Exemple #43
0
 def test_load_save_TSV_skiprows(self):
     fname = TestDataDir + 'test3.tsv'
     self.X.saveSV(fname, printmetadict=False, 
                   metadata=['coloring', 'names'])
     X2 = tb.tabarray(SVfile=fname, skiprows=1)
     Z = self.X.copy()
     Z.coloring = {}
     self.assert_io(eq(Z, X2), fname)
Exemple #44
0
 def test_load_save_TSV_nocomments(self):
     fname = TestDataDir + 'test4.tsv'
     self.X.saveSV(fname, printmetadict=False, 
                   metadata=['coloring', 'names'], comments='')
     X2 = tb.tabarray(SVfile=fname, headerlines=2)
     Z = self.X.copy()
     Z.coloring = {}
     self.assert_io(eq(Z, X2), fname)
Exemple #45
0
 def test_usecols(self):
     fname = TestDataDir + 'usecols.tsv'
     self.x.saveSV(fname)
     x = tb.tabarray(SVfile=fname, usecols=[0,-1])
     names=[self.x.dtype.names[i] for i in [0,-1]]
     print x,x.dtype.names
     print self.x[names],names
     self.assert_io(eq(x, self.x[names]), fname)
Exemple #46
0
 def test_nohash(self):
     fname = TestDataDir + 'nohash.tsv'
     self.x.saveSV(fname, comments='')
     f = open(fname, 'r').read()
     g = open(fname, 'w')
     g.write('this is my file\n' + f)
     g.close()
     x = tb.tabarray(SVfile=fname, headerlines=2)
     self.assert_io(eq(x, self.x), fname)
Exemple #47
0
    def test_linefixer(self):
        fname = TestDataDir + 'linefixer.txt'
        X1 = self.X.copy()
        X1.coloring = {}
        X1.saveSV(fname, delimiter='@')   
        X2 = tb.tabarray(SVfile=fname, 
                         linefixer=(lambda x: x.replace('@','\t')))

        self.assert_io(eq(X1, X2), fname)                      
Exemple #48
0
 def setUp(self):
     V1 = ['North', 'South', 'East', 'West']
     V2 = ['Service', 'Manufacturing', 'Education', 'Healthcare']
     Recs = [(a, b, np.random.rand() * 100, np.random.randint(100)) 
             for a in V1 for b in V2]
     self.X = tb.tabarray(records=Recs,         
                        names=['Region', 'Sector', 'Amount', 'Population'], 
                        coloring={'zoo': ['Region','Sector'], 
                                  'york': ['Population','Sector','Region']})
Exemple #49
0
 def setUp(self):
     V1 = ['North', 'South', 'East', 'West']
     V2 = ['Service', 'Manufacturing', 'Education', 'Healthcare']
     Recs = [(a, b, np.random.rand() * 100, np.random.randint(100000), 
              np.random.rand(), 'Yes' if np.random.rand() < .5 else 'No') 
             for a in V1 for b in V2]
     self.X = tb.tabarray(records=Recs, names=['Region', 'Sector', 'Amount', 
                                  'Population', 'Importance', 'Modernized'])
     self.keycols = ['Region', 'Sector']
     self.others = [['Amount', 'Population'], ['Importance', 'Modernized']]
Exemple #50
0
 def parse_imagenet_meta_data(self, results):
     """
     Parses the meta data from tfrecords into a tabarray
     """
     meta_keys = ["labels"]
     meta = {}
     for k in meta_keys:
         if k not in results:
             raise KeyError('Attribute %s not loaded' % k)
         meta[k] = np.concatenate(results[k], axis=0)
     return tb.tabarray(columns=[list(meta[k]) for k in meta_keys], names = meta_keys)