Esempio n. 1
0
def load_test(tf, filename_test, preprocess=None):
    # Make test data 
    print("Loading test data...")

    X, y = np.load(filename_test)
    X = np.array(X).astype(dict)
    y = np.array(y).astype(int)

    print("\tfilename = " + filename_test)
    print("\tX size = ")
    print(len(X))
    print("\ty size = ")
    print(len(y))
    # Preprocessing 
    print("Preprocessing...")
    X = multithreadmap(rewrite_content,X)
    
    if preprocess:
        X = multithreadmap(preprocess,X)
        
    X = multithreadmap(permute_by_pt,X)
    X = multithreadmap(extract,X)

    X=multithreadmap(tftransform,X,tf=tf)
    i=0
    while i < len(y):
        if X[i]['tree'].shape == (1, 2):
            X,y=np.delete(X,i),np.delete(y,i)
        else :
            i+=1
    return(X, y)
Esempio n. 2
0
def load_tf(filename_train, preprocess=None, n_events_train=-1):
    # Make training data
    print("Loading training data...")

    X, y = np.load(filename_train)
    X=np.array(X).astype(dict)
    y = np.array(y).astype(int)

    if n_events_train > 0:
        indices = np.random.permutation(len(X))[:n_events_train]
        X = X[indices]
        y = y[indices]

    print("\tfilename = " + filename_train)
    print("\tX size = ")
    print(len(X))
    print("\ty size = ")
    print(len(y))

    # Preprocessing 
    print("Preprocessing...")
    X = multithreadmap(rewrite_content,X)

    if preprocess:
        X = multithreadmap(preprocess,X)

    X = multithreadmap(permute_by_pt,multithreadmap(extract,X))
    Xcontent=multithreadmap(extractcontent,X)
    tf = RobustScaler().fit(np.vstack(Xcontent))

    return(tf)
Esempio n. 3
0
def cleanarray(jets_array, addID=False):
    indexes = multithreadmap(find_first_non_particle, jets_array)

    jets_array = list(jets_array)

    for i in range(len(jets_array)):
        jets_array[i] = jets_array[i][:indexes[i]]

    jets_array = multithreadmap(select_particle_features,
                                jets_array,
                                addID=addID)

    return jets_array
Esempio n. 4
0
def preprocess_for_training(filename,regression = False,R_clustering = 0.3,issignal = True, tosavefilename=''):
    events = np.array(np.load(filename))
    signal = multithreadmap(create_jet_dictionary,events,cluster = cluster,regression = regression,R = 1000.)
    
    X = np.array(signal)
    if regression :
        y = np.array(multithreadmap(extract_component,X,component = 'genpt'))
    else :
        if issignal:
            y = np.ones(len(X),dtype = int)
        else :
            y = np.zeros(len(X),dtype = int)

    print('### kt ###')     
    X_ = np.copy(X)
    X_ = multithreadmap(preprocess,X_,output = 'kt',regression = regression,cluster = cluster,R_clustering = R_clustering)        
    X_ = multithreadmap(rewrite_content,X_)
    X_ = multithreadmap(permute_by_pt,X_)
    X_ = multithreadmap(extract,X_)
    np.save(tosavefilename+"kt.npy", np.array([X_, y]))
    
    print('### cambridge ###')
    X_ = np.copy(X)
    X_ = multithreadmap(preprocess,X_,output = 'cambridge',regression = regression,cluster = cluster,R_clustering = R_clustering)        
    X_ = multithreadmap(rewrite_content,X_)
    X_ = multithreadmap(permute_by_pt,X_)
    X_ = multithreadmap(extract,X_)
    np.save(tosavefilename+"cambridge.npy", np.array([X_, y]))


    X = multithreadmap(preprocess,X,output = "anti-kt",regression = regression,cluster = cluster,R_clustering = R_clustering)    

    print('### anti-kt ###')        
    X_ = np.copy(X)
    X_ = multithreadmap(rewrite_content,X_)
    X_ = multithreadmap(permute_by_pt,X_)
    X_ = multithreadmap(extract,X_)
    np.save(tosavefilename+"anti-kt.npy",np.array([X_, y]))
    
    print('### random tree ###')
    X_ = np.copy(X)
    X_ = multithreadmap(randomize,X_)
    X_ = multithreadmap(rewrite_content,X_)
    X_ = multithreadmap(permute_by_pt,X_)
    X_ = multithreadmap(extract,X_)
    np.save(tosavefilename+"random.npy", np.array([X_, y]))
    
    print('### seq by pt ###')
    X_ = np.copy(X)
    X_ = multithreadmap(sequentialize_by_pt,X_,reverse = False)
    X_ = multithreadmap(rewrite_content,X_)
    X_ = multithreadmap(permute_by_pt,X_)
    X_ = multithreadmap(extract,X_)
    np.save(tosavefilename+"seqpt.npy", np.array([X_, y]))
    
    print('### seq by pt reversed ###')
    X_ = np.copy(X)
    X_ = multithreadmap(sequentialize_by_pt,X_,reverse = True)
    X_ = multithreadmap(rewrite_content,X_)
    X_ = multithreadmap(permute_by_pt,X_)
    X_ = multithreadmap(extract,X_)
    np.save(tosavefilename+"seqpt_reversed.npy", np.array([X_, y]))

    return(None)
Esempio n. 5
0
                                      #'QCD_Pt120to170',
                                      #'QCD_Pt50to80',
                                      #'QCD_Pt170to300_ext',
                                      #'QCD_Pt120to170_ext']

#def app(txt):
#    return('/'+txt+'_dataformat.npy')

#signallist = multithreadmap(app,signallist)
#backgroundlist = multithreadmap(app,backgroundlist)

background = []

for path_file in backgroundlist:
    events = np.array(np.load(basepath+path_file))
    background = background + multithreadmap(ff, events,cluster=cluster,R=1.0)

signal = []

for path_file in signallist:
    events = np.array(np.load(basepath+path_file))
    signal = signal + multithreadmap(ff, events, cluster=cluster,R=1.0)

# In[]:
### creating files to be preprocessed ###
nmax = min(len(signal),len(background))
if nmax%2==1:
    nmax -= 1

X = np.array(background[:nmax]+signal[:nmax])
y = np.array([0]*nmax+[1]*nmax)
Esempio n. 6
0
        content = np.array(content).reshape(-1, 5)
        jets.append((tree, content, mass, pt))
        
    return jets

# In[]:
    

for t in ['train','test']:
    ### Loading and "jetting" data with ff ###
    signallist = ['/Background_JEC_'+t+'_ID.npy']
    signal = []
    
    for path_file in signallist:
        events = np.array(np.load(basepath+path_file))
        signal = signal + multithreadmap(ff,events,cluster=cluster,regression=True,R=1000.)
    ## In[]:
    ### creating files to be preprocessed ###
    print(len(signal))
    X = np.array(signal)
    y = np.array(multithreadmap(extract_component,X,component='genpt'))
    
    for R_clustering,f in [(0.3,basepath+'/npyfilesregression/subjet_oriented_'),
                           ( 0.000001,basepath+'/npyfilesregression/particle_oriented_')]:
        if t='train'
            ## In[]:
            ### eliminate single particles ###
            i=0
            while i < (len(y)):
                if X[i]['tree'].shape == (1, 2):
                    X,y=np.delete(X,i),np.delete(y,i)