# positive set contains 2401 elements, of which we use 30% to test of we cen improve them , # the rest is used for the oracle lenpo=int(2401*.3) # we select those 30% randomly: splitset= range(2014) random.shuffle(splitset) sample=splitset[:lenpo] oracle=splitset[lenpo:] path='../example/' # we create an oracle estimator=make_estimator(picker(gspan_to_eden(path+'bursi.pos.gspan'),oracle),gspan_to_eden(path+'bursi.neg.gspan')) print 'estimator ok' # ok we create an iterator over the graphs we want to work with... graphs_pos= picker( gspan_to_eden(path+'bursi.pos.gspan') , sample) # save results here: originals=[] improved=[] # we want to use an increasing part of the test set..
# how many graphs will be used for sampling? count_pos = int(NUMPOS * 0.7 * perc) count_neg = int(NUMNEG * 0.7 * perc) # copy the mega set graphs_pos, graphs_pos_, graphs_pos__ = itertools.tee(graphs_pos, 3) graphs_neg, graphs_neg_, graphs_neg__ = itertools.tee(graphs_neg, 3) # create a shuffeld list of graph ids pos_id = range(NUMPOS) neg_id = range(NUMNEG) random.shuffle(pos_id) random.shuffle(neg_id) # use shuffled list to create test and sample set pos, pos_ = itertools.tee(picker(graphs_pos_, pos_id[:count_pos])) neg, neg_ = itertools.tee(picker(graphs_neg_, neg_id[:count_neg])) postest = picker(graphs_pos__, pos_id[count_pos : int(NUMPOS * 0.3)]) negtest = picker(graphs_neg__, neg_id[count_neg : int(NUMNEG * 0.3)]) ############### then we sample ##################### improved_neg = sample(neg) improved_pos = sample(pos) ######### and last we evaluate ########### print "evaluating.." imp, ori = train_estimator_and_evaluate_testsets(pos_, neg_, improved_pos, improved_neg, postest, negtest) improved.append(imp) originals.append(ori) print "done:" + str(perc) print "*" * 80
# how many graphs will be used for sampling? count_pos = int(NUMPOS*.7*perc) count_neg = int(NUMNEG*.7*perc) # copy the mega set graphs_pos, graphs_pos_, graphs_pos__ = itertools.tee(graphs_pos,3) graphs_neg, graphs_neg_ , graphs_neg__= itertools.tee(graphs_neg,3) #create a shuffeld list of graph ids pos_id = range(NUMPOS) neg_id = range(NUMNEG) random.shuffle(pos_id) random.shuffle(neg_id) # use shuffled list to create test and sample set pos,pos_ = itertools.tee( picker(graphs_pos_,pos_id[:count_pos]) ) neg,neg_ = itertools.tee( picker (graphs_neg_,neg_id[:count_neg])) postest = picker(graphs_pos__,pos_id[count_pos:int(NUMPOS*.3)]) negtest = picker(graphs_neg__,neg_id[count_neg:int(NUMNEG*.3)]) ############### then we sample ##################### improved_neg= sample(neg) improved_pos= sample(pos) ######### and last we evaluate ########### print 'evaluating..' imp,ori=train_estimator_and_evaluate_testsets( pos_,neg_,improved_pos, improved_neg, postest,negtest) improved.append(imp)