import os KEYSPACE = "ssp_2class" #"ssp_schema_slope0" #"sampled_ssp_schema_kgcn" URI = "localhost:48555" # Existing elements in the graph are those that pre-exist in the graph, and should be predicted to continue to exist PREEXISTS = 0 # Candidates are neither present in the input nor in the solution, they are negative samples CANDIDATE = 1 # Elements to infer are the graph elements whose existence we want to predict to be true, they are positive samples TO_INFER = 2 from data_prep import LoadData, FeatDuct, UndersampleData datapath = os.getcwd()+'\data\\' ALLDATA = LoadData(datapath) ALLDATA = FeatDuct(ALLDATA, Input_Only = True) #leave only model input PROCESSED_DATA = pd.read_csv(datapath+"data_complete.csv") # Categorical Attribute types and the values of their categories ses = ['Winter', 'Spring', 'Summer', 'Autumn'] locations = [] for ssp in ALLDATA['profile']: season = next((s for s in ses if s in ssp), False) location = ssp.replace(season, '')[:-1] location = location.replace(' ', '-') locations.append(location) loc = np.unique(locations).tolist() # Categorical Attributes and lists of their values
#config.gpu_options.allow_growth=True #sess = tf.compat.v1.Session(config=config) ### Test tf for GPU acceleration # TODO: Issues with GPU acceleration # print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) tf.reset_default_graph() import warnings from functools import reduce KEYSPACE = "ssp_schema_kgcn" URI = "localhost:48555" import os from data_prep import LoadData, FeatDuct path = os.getcwd() + '\data\\' raw_data = LoadData(path) data = FeatDuct(raw_data, Input_Only=True) #leave only model input data_complete = pd.read_csv(path + "data_complete.csv") # DATA SELECTION FOR GRAKN TESTING data = pd.concat( [data.iloc[0:10, :], data.iloc[440:446, :], data.iloc[9020:9026, :]]) #data = pd.concat([data.iloc[0:3,:],data.iloc[440:443,:]]) #data = data.iloc[9020:9022,:] # Existing elements in the graph are those that pre-exist in the graph, and should be predicted to continue to exist PREEXISTS = 0 # Candidates are neither present in the input nor in the solution, they are negative samples CANDIDATE = 1 # Elements to infer are the graph elements whose existence we want to predict to be true, they are positive samples TO_INFER = 2
import seaborn as sns from imblearn.under_sampling import RandomUnderSampler from collections import Counter import os from data_prep import LoadData, FeatDuct, EncodeData, CreateSplits, TrainTestSplit, FeatBathy, FeatSSPvec, FeatSSPId, FeatSSPStat from xgb_mylib import f1_eval_class from data_analysis_lib import PlotCorrelation, ICEPlot """" A PDP is the average of the lines of an ICE plot. Unlike partial dependence plots, ICE curves can uncover heterogeneous relationships. PDPs can obscure a heterogeneous relationship created by interactions. PDPs can show you what the average relationship between a feature and the prediction looks like. This only works well if the interactions between the features for which the PDP is calculated and the other features are weak. In case of interactions, the ICE plot will provide much more insight. """ path = os.getcwd() + '\data\\' rawdata = LoadData(path) data = FeatDuct(rawdata, Input_Only=True) #just to leave only input data data = FeatBathy(data, path) data = FeatSSPId(data, path, src_cond=True) data = FeatSSPStat(data, path) data_enc = EncodeData( data) #data with full features sspid, sspstat, but no ssp-vec data_enc = data_enc.fillna(0) #ICE plot func has problems with NaNs :( target = 'num_rays' features = data_enc.columns.tolist() features.remove(target) seasons = ['Autumn', 'Spring', 'Summer', 'Winter'] locations = [ 'Labrador-Sea', 'Mediterranean-Sea', 'North-Pacific-Ocean',