def mtl_fixer(path,model_id,libpath): [dirpath,filename] = os.path.split(path) fixed_path = os.path.join(dirpath,'fixed') if not os.path.exists(fixed_path): print('fixing mtl path %s' % path) F = open(path).read() D = uniqify([x.group() for x in ad_pattern.finditer(F)]) for d in D: F = F.replace(d,d.split('\\')[-1].split('/')[-1]) D = uniqify([x.group() for x in ad_pattern2.finditer(F)]) for d in D: F = F.replace(d,d.split('\\')[-1].split('/')[-1]) D = uniqify([x.group() for x in STRING_PATTERN.finditer(F)]) for d in D: d = d.strip() F = F.replace(d,d.split('/')[-1].strip()) d = d.split('/')[-1].strip() [base,ext] = os.path.splitext(d) oldpath = os.path.join(libpath,base + ext) base = base.lower() ; ext = ext.lower() newpath = os.path.join(libpath,base + ext) F = F.replace(d,newpath) [dir,fname] = os.path.split(newpath) L = os.listdir(dir) op = [os.path.join(dir,l) for l in L if l.lower() == fname][0] if op != newpath: os.system('mv ' + op + ' ' + newpath) f = open(fixed_path,'w') f.write('fixed') f.close() f = open(path,'w') f.write(F) f.close()
def train_test(config_path,outdir,query,ntrain,ntest,ntrain_pos = None,classifier = None,classifier_kwargs = {},N=10,universe=None): MakeDir(outdir) if isinstance(query,dict): splitter = generate_split classifier = classify else: splitter = generate_multi_split if classifier is None: classifier = ova_classify split_data = [] results = [] for i in range(N): print(i) split = splitter(config_path,'add_features',query,ntrain,ntest,ntrain_pos = ntrain_pos, universe=universe) train_data = split['train_data'] train_features = split['train_features'] train_labels = split['train_labels'] test_data = split['test_data'] test_features = split['test_features'] test_labels = split['test_labels'] if (not classifier_kwargs.get('multi_class')) or len(uniqify(train_labels)) > 2: train_filenames = [t['filename'] for t in train_data] test_filenames = [t['filename'] for t in test_data] split_data.append({'train_filenames':train_filenames,'train_labels':train_labels, 'test_filenames': test_filenames,'test_labels':test_labels}) assert set(train_filenames).intersection(test_filenames) == set([]) res = classifier(train_features,train_labels,test_features,test_labels,**classifier_kwargs) results.append(res) stats = ['test_accuracy','ap','auc','mean_ap','mean_auc','train_accuracy'] output = {'split_results' : results} for stat in stats: if stat in results[0] and results[0][stat] != None: output[stat] = sp.array([result[stat] for result in results]).mean() F = open(os.path.join(outdir,'splits.pickle'),'w') cPickle.dump(split_data,F) F.close() F = open(os.path.join(outdir,'results.pickle'),'w') cPickle.dump(output,F) F.close()
def inferSpaceCode(name): parts = uniqify(name.lower().split('_') + name.lower().split(' ')) if 'msa' in parts and not 'code' in parts: return 'm' elif 'state' in parts and not 'code' in parts: return 's' elif 'county' in parts and not 'code' in parts: return 'c' elif 'fips' in parts and 'text' in parts: return 'X' elif 'area' in parts and 'code' not in parts: return 'X' elif 'fips' in parts and 'state' not in parts and 'county' not in parts: return 'f.X' elif 'state' in parts and ('code' in parts or 'fips' in parts): return 'f.s' elif 'county' in parts and ('code' in parts or 'fips' in parts): return 'f.c' elif 'msa' in parts and ('code' in parts or 'fips' in parts): return 'f.m'
def write_outcerts(func, configs, incertdicts, outcertpaths, db): if incertdicts: old_param_names = dict_union([op["param_names"] for op in incertdicts]) else: old_param_names = SON([]) new_param_names = uniqify(ListUnion([x.keys() for x in configs])) for (outcertpath, outroot) in zip(outcertpaths, func.outroots): param_names = old_param_names.copy() param_names[outroot] = new_param_names remove_incorrect(db, outroot, func.outconfig_string, func.outrun_hash) createCertificateDict( outcertpath, { "run_hash": func.outrun_hash, "db": func.dbname, "out_args": func.out_args, "root": outroot, "config_hash": func.outconfig_string, "param_names": param_names, }, )
def ConsolidateSources(metapath,objname=None,extensions=None): consolidated = {} if extensions is None: extensions = ['Attached','Associated','Automatic','Inherited'] combined = CombineSources(metapath,extensions=extensions) if 'Resources' in combined.keys(): consolidated['Resources'] = uniqify(ListUnion(combined['Resources'].values())) if 'image' in combined.keys(): consolidated['image'] = ListUnion([x.split(',') if is_string_like(x) else x for x in combined['image'].values()]) if 'author' in combined.keys(): consolidated['author'] = '; '.join(combined['author'].values()) if 'title' in combined.keys(): consolidated['title'] = '; '.join(combined['title'].values()) if 'description' in combined.keys(): descrs = combined['description'].items() if len(descrs) == 1: consolidated['description'] = descrs[0][1] else: consolidated['description'] = '\n\n'.join([e + ': ' + d for (e,d) in descrs]) elif 'Verbose' in combined.keys(): descrs = combined['Verbose'].items() if len(descrs) == 1: consolidated['description'] = descrs[0][1] else: consolidated['description'] = '\n\n'.join([e + ': ' + d for (e,d) in descrs]) if 'keywords' in combined.keys(): for k in combined['keywords'].keys(): if not is_string_like(combined['keywords'][k]): combined['keywords'][k] = ','.join(combined['keywords'][k]) consolidated['keywords'] = ','.join([x.strip() for x in uniqify((','.join(combined['keywords'].values())).split(','))]) if 'signature' in combined.keys(): s = uniqify(combined['signature'].values()) if len(s) == 1: consolidated['signature'] = s[0] else: consolidated['signature'] = '' L = ['nrows','ncols','coloring','wordassoc','colformats','coltypes','colnames','wordassoc','frequentwords','nfiles','npaths'] LL = L + [x for x in combined.keys() if x.startswith('DIR_')] for x in LL: if x in combined.keys() and 'Automatic' in combined[x].keys(): consolidated[x] = combined[x]['Automatic'] elif x in combined.keys() and 'Attached' in combined[x].keys(): consolidated[x] = combined[x]['Automatic'] elif x in combined.keys() and 'Associated' in combined[x].keys(): consolidated[x] = combined[x]['Associated'] elif x in combined.keys() and 'Inherited' in combined[x].keys(): consolidated[x] = combined[x]['Inherited'] if 'coldescrs' in combined.keys(): coldescrs = {} for c in combined['coldescrs'].values(): if isinstance(c,dict): for k in c.keys(): if k in coldescrs.keys(): coldescrs[k] += (c[k],) else: coldescrs[k] = (c[k],) for k in coldescrs.keys(): coldescrs[k] = '\n'.join(coldescrs[k]) consolidated['coldescrs'] = coldescrs OtherKeys = set(combined.keys()).difference(consolidated.keys()) for k in OtherKeys: consolidated[k] = ' '.join([x if is_string_like(x) else repr(x) for x in combined[k].values()]) return consolidated
def validate(idseq): ids = ListUnion(idseq) ids1 = [id[1] for id in ids] assert len(uniqify(ids1)) == sum([len(X) for X in idseq]), 'Classes are not disjoint.' return ids