def make_task_file2(aid='1834', sizes=[50, 75, 100], test=50, repeats=2, params=[{}, {}, {}], selectsamplers=[0, 1, 2]): ''' we drop lots of these in 1 file: task = namedtuple("task",'samplerid size repeat sampler neg pos') ''' pos, neg = util.getgraphs(aid) tasks = [] for size in sizes: repeatsXposnegsamples = util.sample_pos_neg(pos, neg, size + test, size + test, repeats) repeatsXposnegsamples = [[ pos2[:size], neg2[:size], pos2[size:], neg2[size:] ] for pos2, neg2 in repeatsXposnegsamples] for i, sampler in enumerate( util.get_all_samplers(params=params, select=selectsamplers)): for j, (pos_sample, neg_sample, pos_test, neg_test) in enumerate(repeatsXposnegsamples): tasks.append( util.task2(i, size, j, sampler, neg_sample, pos_sample, neg_test, pos_test)) util.dumpfile(tasks, "natlearn/t2_%s_%d" % (aid, max(sizes))) return "t2_%s_%d" % (aid, max(sizes))
def make_task_file(aid='1834', sizes=[50, 75, 100], repeats=2, params=[{}, {}, {}], selectsamplers=[0, 1, 2], taskfile_poststring=''): ''' we drop lots of these in 1 file: task = namedtuple("task",'samplerid size repeat sampler neg pos') ''' pos, neg = util.getgraphs(aid) tasks = [] models = [] for size in sizes: repeatsXposnegsamples, estis = util.sample_pos_neg_ESTI( pos, neg, size, size, repeats) models.append(estis) for i, sampler in enumerate( util.get_all_samplers(params=params, select=selectsamplers)): for j, (pos_sample, neg_sample) in enumerate(repeatsXposnegsamples): tasks.append( util.task(i, size, j, sampler, neg_sample, pos_sample)) fname = "%s_%d%s" % (aid, max(sizes), taskfile_poststring) util.dumpfile(tasks, fname) util.dumpfile(models, fname + "_models") return fname
def run(fname, idd): task = util.loadfile(fname)[idd] #draw.graphlearn(decomposers[:5],size=10) esti = util.graphs_to_linmodel(task.pos, task.neg) X, y = util.graphs_to_Xy(task.postest, task.negtest) ypred = esti.predict(X) acc = sklearn.metrics.accuracy_score(y, ypred) util.dumpfile((task.size, acc), "ASD/d_%s_%d" % (fname, idd))
def make_task(aid, sizes=[50,100,150], test=100,repeats=3, params={},postname=''): pos,neg = util.getgraphs(aid) tasks=[] for size in sizes: getsize=size+test for i,(p,n) in enumerate(util.sample_pos_neg(pos,neg,getsize,getsize,repeats)): sampler = util.get_casc_abstr(kwargs=params) tasks.append(util.task2(1,size,i,sampler,n[:size],p[:size],n[size:],p[size:])) util.dumpfile(tasks,"stack_task"+postname)
def run(filename, taskid): tasks = util.loadfile(filename) try: result = util.sample(tasks[taskid]) except Exception as exc: print "molelearnedlayer except" #print tasks[taskid] #import traceback #print traceback.format_exc(20) return None util.dumpfile(result, getresfilename(filename, taskid))
def run(filename, taskid): tasks = util.loadfile("natlearn/" + filename) task = gettask(tasks, taskid) try: result = util.sample(task) except Exception as exc: print "naturallearning is showing the task object:" print task import traceback print traceback.format_exc(20) return None util.dumpfile(result, "natlearn/res_%s_%d" % (filename, taskid))
def evalandshow(fname, tasknum, sizes, show=False): #oracle = util.aid_to_linmodel(aid) res = readresults(fname, tasknum) processed = eval(res, util.loadfile(fname + "_models"), sizes) util.dumpfile((processed, fname, show), "ASASD") draw2(processed, fname + "score.png", show=show) draw(processed, fname + "time.png", get_mean=lambda x: x.time_mean, get_var=lambda x: x.time_var, show=show)
def run(fname,idd): def getacc(esti, a,b): X,y = util.graphs_to_Xy(a,b) ypred = esti.predict(X) acc = sklearn.metrics.accuracy_score(y,ypred) return acc task = util.loadfile(fname)[idd] # make an estimator with the full stack full_stacked = [task.sampler.decomposer.make_new_decomposer(data).pre_vectorizer_graph() for data in task.sampler.graph_transformer.fit_transform(task.pos,task.neg, remove_intermediary_layers=False)] esti= util.graphs_to_linmodel( full_stacked[:task.size], full_stacked[task.size:] ) # fully stacked test instances testgraphs= [task.sampler.decomposer.make_new_decomposer(data).pre_vectorizer_graph() for data in task.sampler.graph_transformer.transform(task.postest+task.negtest,remove_intermediary_layers=False)] acc= getacc(esti, testgraphs[:len(task.postest)], testgraphs[len(task.postest):] ) util.dumpfile((task.size,acc), "stacked/%s_%d" % (fname,idd)) task = util.loadfile(fname)[idd] # make an estimator with the full stack full_stacked = [task.sampler.decomposer.make_new_decomposer(data).pre_vectorizer_graph() for data in task.sampler.graph_transformer.fit_transform(task.pos,task.neg, remove_intermediary_layers=True)] esti= util.graphs_to_linmodel( full_stacked[:task.size], full_stacked[task.size:] ) # fully stacked test instances testgraphs= [task.sampler.decomposer.make_new_decomposer(data).pre_vectorizer_graph() for data in task.sampler.graph_transformer.transform(task.postest+task.negtest,remove_intermediary_layers=True)] acc= getacc(esti, testgraphs[:len(task.postest)], testgraphs[len(task.postest):] ) util.dumpfile((task.size,acc), "stacked/2_%s_%d" % (fname,idd))