Ejemplo n.º 1
0
# positive set contains 2401 elements, of which we use 30% to test of we cen improve them ,
# the rest is used for the oracle
lenpo=int(2401*.3)


# we select those 30% randomly:
splitset= range(2014)
random.shuffle(splitset)
sample=splitset[:lenpo]
oracle=splitset[lenpo:]

path='../example/'

# we create an oracle
estimator=make_estimator(picker(gspan_to_eden(path+'bursi.pos.gspan'),oracle),gspan_to_eden(path+'bursi.neg.gspan'))
print 'estimator ok'



# ok we create an iterator over the graphs we want to work with...
graphs_pos= picker( gspan_to_eden(path+'bursi.pos.gspan') , sample)


# save results here:
originals=[]
improved=[]



# we want to use an increasing part of the test set..
Ejemplo n.º 2
0
    # how many graphs will be used for sampling?
    count_pos = int(NUMPOS * 0.7 * perc)
    count_neg = int(NUMNEG * 0.7 * perc)

    # copy the mega set
    graphs_pos, graphs_pos_, graphs_pos__ = itertools.tee(graphs_pos, 3)
    graphs_neg, graphs_neg_, graphs_neg__ = itertools.tee(graphs_neg, 3)

    # create a shuffeld list of graph ids
    pos_id = range(NUMPOS)
    neg_id = range(NUMNEG)
    random.shuffle(pos_id)
    random.shuffle(neg_id)

    # use shuffled list to create test and sample set
    pos, pos_ = itertools.tee(picker(graphs_pos_, pos_id[:count_pos]))
    neg, neg_ = itertools.tee(picker(graphs_neg_, neg_id[:count_neg]))
    postest = picker(graphs_pos__, pos_id[count_pos : int(NUMPOS * 0.3)])
    negtest = picker(graphs_neg__, neg_id[count_neg : int(NUMNEG * 0.3)])

    ############### then we sample #####################
    improved_neg = sample(neg)
    improved_pos = sample(pos)

    ######### and last we evaluate ###########
    print "evaluating.."
    imp, ori = train_estimator_and_evaluate_testsets(pos_, neg_, improved_pos, improved_neg, postest, negtest)
    improved.append(imp)
    originals.append(ori)
    print "done:" + str(perc)
    print "*" * 80
Ejemplo n.º 3
0
# positive set contains 2401 elements, of which we use 30% to test of we cen improve them ,
# the rest is used for the oracle
lenpo=int(2401*.3)


# we select those 30% randomly:
splitset= range(2014)
random.shuffle(splitset)
sample=splitset[:lenpo]
oracle=splitset[lenpo:]

path='../example/'

# we create an oracle
estimator=make_estimator(picker(gspan_to_eden(path+'bursi.pos.gspan'),oracle),gspan_to_eden(path+'bursi.neg.gspan'))
print 'estimator ok'



# ok we create an iterator over the graphs we want to work with...
graphs_pos= picker( gspan_to_eden(path+'bursi.pos.gspan') , sample)


# save results here:
originals=[]
improved=[]



# we want to use an increasing part of the test set..
Ejemplo n.º 4
0
    # how many graphs will be used for sampling?
    count_pos = int(NUMPOS*.7*perc)
    count_neg = int(NUMNEG*.7*perc)

    # copy the mega set
    graphs_pos, graphs_pos_, graphs_pos__ = itertools.tee(graphs_pos,3)
    graphs_neg, graphs_neg_ , graphs_neg__= itertools.tee(graphs_neg,3)

    #create a shuffeld list of graph ids
    pos_id = range(NUMPOS)
    neg_id = range(NUMNEG)
    random.shuffle(pos_id)
    random.shuffle(neg_id)

    # use shuffled list to create test and sample set
    pos,pos_ = itertools.tee(  picker(graphs_pos_,pos_id[:count_pos]) )
    neg,neg_ = itertools.tee( picker (graphs_neg_,neg_id[:count_neg]))
    postest = picker(graphs_pos__,pos_id[count_pos:int(NUMPOS*.3)])
    negtest = picker(graphs_neg__,neg_id[count_neg:int(NUMNEG*.3)])



    ############### then we sample #####################
    improved_neg= sample(neg)
    improved_pos= sample(pos)


    ######### and last we evaluate ###########
    print 'evaluating..'
    imp,ori=train_estimator_and_evaluate_testsets( pos_,neg_,improved_pos, improved_neg, postest,negtest)
    improved.append(imp)