# estimator, if the user is providing a negative graph set, we use # the twoclass esti OO import graphlearn.estimate as estimate if args['negative_input']==None: args['estimator']=estimate.OneClassEstimator(nu=.5, cv=2, n_jobs=-1) else: args['estimator']=estimate.TwoClassEstimator( cv=2, n_jobs=-1) #args for fitting: from eden.converter.graph.gspan import gspan_to_eden from itertools import islice fitargs={ k:args.pop(k) for k in ['lsgg_include_negatives','grammar_n_jobs','grammar_batch_size']} if args['negative_input']!=None: fitargs['negative_input'] = islice(gspan_to_eden(args.pop('negative_input')),args.pop('num_graphs_neg')) else: args.pop('negative_input') args.pop('num_graphs_neg') fitargs['input'] = islice(gspan_to_eden(args.pop('input')),args.pop('num_graphs')) #output OUTFILE=args.pop('output') print "*Sampler init" print "*"*80 print args # CREATE SAMPLER, dumping the rest of the parsed args :) from graphlearn.graphlearn import Sampler
# positive set contains 2401 elements, of which we use 30% to test of we cen improve them , # the rest is used for the oracle lenpo=int(2401*.3) # we select those 30% randomly: splitset= range(2014) random.shuffle(splitset) sample=splitset[:lenpo] oracle=splitset[lenpo:] path='../example/' # we create an oracle estimator=make_estimator(picker(gspan_to_eden(path+'bursi.pos.gspan'),oracle),gspan_to_eden(path+'bursi.neg.gspan')) print 'estimator ok' # ok we create an iterator over the graphs we want to work with... graphs_pos= picker( gspan_to_eden(path+'bursi.pos.gspan') , sample) # save results here: originals=[] improved=[] # we want to use an increasing part of the test set..
sampler.transform( graphs_, same_radius=False, size_constrained_core_choice=False, sampling_interval=9999, select_cip_max_tries=100, batch_size=30, n_steps=100, n_jobs=-1, improving_threshold=0.9, ) ) # initializing graphs_pos = gspan_to_eden(path + "bursi.pos.gspan") graphs_neg = gspan_to_eden(path + "bursi.neg.gspan") originals = [] improved = [] percentages = [0.2, 0.4, 0.6, 0.8, 1] for perc in percentages: ######### first we generate all the iterators ########### # how many graphs will be used for sampling? count_pos = int(NUMPOS * 0.7 * perc) count_neg = int(NUMNEG * 0.7 * perc) # copy the mega set graphs_pos, graphs_pos_, graphs_pos__ = itertools.tee(graphs_pos, 3) graphs_neg, graphs_neg_, graphs_neg__ = itertools.tee(graphs_neg, 3)
sampler =GraphLearnSampler() graphs, graphs_ = itertools.tee(graphs) sampler.fit(graphs) return unpack(sampler.sample(graphs_, same_radius=False, max_size_diff=False, sampling_interval=9999, select_cip_max_tries=100, batch_size=30, n_steps=100, n_jobs=-1, improving_threshold=0.9 )) # initializing graphs_pos= gspan_to_eden(path+'bursi.pos.gspan') graphs_neg= gspan_to_eden(path+'bursi.neg.gspan') originals=[] improved=[] percentages=[.2,.4,.6,.8,1] for perc in percentages: ######### first we generate all the iterators ########### # how many graphs will be used for sampling? count_pos = int(NUMPOS*.7*perc) count_neg = int(NUMNEG*.7*perc) # copy the mega set graphs_pos, graphs_pos_, graphs_pos__ = itertools.tee(graphs_pos,3) graphs_neg, graphs_neg_ , graphs_neg__= itertools.tee(graphs_neg,3)