def mtl_fixer(path,model_id,libpath):
    [dirpath,filename] = os.path.split(path)
    fixed_path = os.path.join(dirpath,'fixed')
    if not os.path.exists(fixed_path):
        print('fixing mtl path %s' % path)
        F = open(path).read()
        D = uniqify([x.group() for x in ad_pattern.finditer(F)])
        for d in D:
            F = F.replace(d,d.split('\\')[-1].split('/')[-1])
        D = uniqify([x.group() for x in ad_pattern2.finditer(F)])
        for d in D:
            F = F.replace(d,d.split('\\')[-1].split('/')[-1])   
        D = uniqify([x.group() for x in STRING_PATTERN.finditer(F)])
        for d in D:
            d = d.strip()
            F = F.replace(d,d.split('/')[-1].strip())
            d = d.split('/')[-1].strip()
            [base,ext] = os.path.splitext(d)
            oldpath = os.path.join(libpath,base + ext)
            base = base.lower() ; ext = ext.lower()
            newpath = os.path.join(libpath,base + ext)
            F = F.replace(d,newpath)
            [dir,fname] = os.path.split(newpath)
            L = os.listdir(dir)
            op = [os.path.join(dir,l) for l in L if l.lower() == fname][0]
            if op != newpath:
                os.system('mv ' + op + ' ' + newpath)
        
        f = open(fixed_path,'w')
        f.write('fixed')
        f.close()
        f = open(path,'w')
        f.write(F)
        f.close()
예제 #2
0
파일: ecc_main.py 프로젝트: yamins81/ecc
def train_test(config_path,outdir,query,ntrain,ntest,ntrain_pos = None,classifier = None,classifier_kwargs = {},N=10,universe=None):
    MakeDir(outdir)

    if isinstance(query,dict):
        splitter = generate_split
        classifier = classify
    else:
        splitter = generate_multi_split
        if classifier is None:
            classifier = ova_classify
               
    split_data = []
    results = []

    for i in range(N):
        print(i)
        split = splitter(config_path,'add_features',query,ntrain,ntest,ntrain_pos = ntrain_pos, universe=universe)
        train_data = split['train_data']
        train_features = split['train_features']
        train_labels = split['train_labels']
        test_data = split['test_data']
        test_features = split['test_features']
        test_labels = split['test_labels']

        if (not classifier_kwargs.get('multi_class')) or len(uniqify(train_labels)) > 2:
            train_filenames = [t['filename'] for t in train_data]
            test_filenames = [t['filename'] for t in test_data]
            split_data.append({'train_filenames':train_filenames,'train_labels':train_labels,
                           'test_filenames': test_filenames,'test_labels':test_labels})
                           
            assert set(train_filenames).intersection(test_filenames) == set([])
            res = classifier(train_features,train_labels,test_features,test_labels,**classifier_kwargs)
        
            results.append(res)

    stats = ['test_accuracy','ap','auc','mean_ap','mean_auc','train_accuracy']
    
    output = {'split_results' : results}
    
    for stat in stats:
        if stat in results[0] and results[0][stat] != None:
            output[stat] = sp.array([result[stat] for result in results]).mean()
    

    F = open(os.path.join(outdir,'splits.pickle'),'w')
    cPickle.dump(split_data,F)
    F.close()
    F = open(os.path.join(outdir,'results.pickle'),'w')
    cPickle.dump(output,F)
    F.close()
    


    
예제 #3
0
def inferSpaceCode(name):
    parts = uniqify(name.lower().split('_') + name.lower().split(' '))
    if 'msa' in parts and not 'code' in parts:
        return 'm'
    elif 'state' in parts and not 'code' in parts:
        return 's'
    elif 'county' in parts and not 'code' in parts:
        return 'c'
    elif 'fips' in parts and 'text' in parts:
        return 'X'
    elif 'area' in parts and 'code' not in parts:
        return 'X'
    elif 'fips' in parts and 'state' not in parts and 'county' not in parts:
        return 'f.X'
    elif 'state' in parts and ('code' in parts or 'fips' in parts):
        return 'f.s'
    elif 'county' in parts and ('code' in parts or 'fips' in parts):
        return 'f.c'
    elif 'msa' in parts and ('code' in parts or 'fips' in parts):
        return 'f.m'
예제 #4
0
def write_outcerts(func, configs, incertdicts, outcertpaths, db):
    if incertdicts:
        old_param_names = dict_union([op["param_names"] for op in incertdicts])
    else:
        old_param_names = SON([])

    new_param_names = uniqify(ListUnion([x.keys() for x in configs]))
    for (outcertpath, outroot) in zip(outcertpaths, func.outroots):
        param_names = old_param_names.copy()
        param_names[outroot] = new_param_names
        remove_incorrect(db, outroot, func.outconfig_string, func.outrun_hash)
        createCertificateDict(
            outcertpath,
            {
                "run_hash": func.outrun_hash,
                "db": func.dbname,
                "out_args": func.out_args,
                "root": outroot,
                "config_hash": func.outconfig_string,
                "param_names": param_names,
            },
        )
def ConsolidateSources(metapath,objname=None,extensions=None):
	
	consolidated = {}
	if extensions is None:
		extensions = ['Attached','Associated','Automatic','Inherited']
	combined = CombineSources(metapath,extensions=extensions)

	if 'Resources' in combined.keys():
		consolidated['Resources'] = uniqify(ListUnion(combined['Resources'].values()))
			
	if 'image' in combined.keys():
		consolidated['image'] = ListUnion([x.split(',') if is_string_like(x) else x for x in combined['image'].values()])
	
	if 'author' in combined.keys():
		consolidated['author'] = '; '.join(combined['author'].values())
	
	if 'title' in combined.keys():
		consolidated['title'] = '; '.join(combined['title'].values())
	
	if 'description' in combined.keys():
		descrs = combined['description'].items()
		if len(descrs) == 1:
			consolidated['description'] = descrs[0][1]
		else:
			consolidated['description'] = '\n\n'.join([e + ': ' + d for (e,d) in descrs])
			
	elif 'Verbose' in combined.keys():
		descrs = combined['Verbose'].items()
		if len(descrs) == 1:
			consolidated['description'] = descrs[0][1]
		else:
			consolidated['description'] = '\n\n'.join([e + ': ' + d for (e,d) in descrs])
	
	if 'keywords' in combined.keys():
		for k in combined['keywords'].keys():
			if not is_string_like(combined['keywords'][k]):
				combined['keywords'][k] = ','.join(combined['keywords'][k])
				
		consolidated['keywords'] = ','.join([x.strip() for x in uniqify((','.join(combined['keywords'].values())).split(','))])
				
				
	if 'signature' in combined.keys():
		s = uniqify(combined['signature'].values())
		if len(s) == 1:
			consolidated['signature'] = s[0]
		else:
			consolidated['signature'] = ''
	
	L = ['nrows','ncols','coloring','wordassoc','colformats','coltypes','colnames','wordassoc','frequentwords','nfiles','npaths']
	LL = L + [x for x in combined.keys() if x.startswith('DIR_')]
	for x in LL:
		if x in combined.keys() and 'Automatic' in combined[x].keys():
			consolidated[x] = combined[x]['Automatic']
		elif x in combined.keys() and 'Attached' in combined[x].keys():
			consolidated[x] = combined[x]['Automatic']
		elif x in combined.keys() and 'Associated' in combined[x].keys():
			consolidated[x] = combined[x]['Associated']		
		elif x in combined.keys() and 'Inherited' in combined[x].keys():
			consolidated[x] = combined[x]['Inherited']		


	if 'coldescrs' in combined.keys():
		coldescrs = {}
		for c in combined['coldescrs'].values():
			if isinstance(c,dict):
				for k in c.keys():
					if k in coldescrs.keys():
						coldescrs[k] += (c[k],)
					else:
						coldescrs[k] = (c[k],)
	
		for k in coldescrs.keys():
			coldescrs[k] = '\n'.join(coldescrs[k])
		
		consolidated['coldescrs'] = coldescrs
				
	OtherKeys = set(combined.keys()).difference(consolidated.keys())

	for k in OtherKeys:
		consolidated[k] = ' '.join([x if is_string_like(x) else repr(x) for x in combined[k].values()])

	return consolidated
예제 #6
0
파일: splits.py 프로젝트: yamins81/ecc
def validate(idseq):
    ids = ListUnion(idseq)
    ids1 = [id[1] for id in ids]
    assert len(uniqify(ids1)) == sum([len(X) for X in idseq]), 'Classes are not disjoint.'
    return ids