Ejemplo n.º 1
0
import os

KEYSPACE =  "ssp_2class" #"ssp_schema_slope0"  #"sampled_ssp_schema_kgcn"
URI = "localhost:48555"

# Existing elements in the graph are those that pre-exist in the graph, and should be predicted to continue to exist
PREEXISTS = 0
# Candidates are neither present in the input nor in the solution, they are negative samples
CANDIDATE = 1
# Elements to infer are the graph elements whose existence we want to predict to be true, they are positive samples
TO_INFER = 2

from data_prep import LoadData, FeatDuct, UndersampleData
datapath = os.getcwd()+'\data\\'
ALLDATA = LoadData(datapath)
ALLDATA = FeatDuct(ALLDATA, Input_Only = True) #leave only model input
PROCESSED_DATA = pd.read_csv(datapath+"data_complete.csv")


# Categorical Attribute types and the values of their categories
ses = ['Winter', 'Spring', 'Summer', 'Autumn']
locations = []
for ssp in ALLDATA['profile']:
    season = next((s for s in ses if s in ssp), False)
    location = ssp.replace(season, '')[:-1]
    location = location.replace(' ', '-')
    locations.append(location)
loc = np.unique(locations).tolist()

# Categorical Attributes and lists of their values
Ejemplo n.º 2
0
#config.gpu_options.allow_growth=True
#sess = tf.compat.v1.Session(config=config)
### Test tf for GPU acceleration
# TODO: Issues with GPU acceleration
# print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
tf.reset_default_graph()
import warnings
from functools import reduce

KEYSPACE = "ssp_schema_kgcn"
URI = "localhost:48555"

import os
from data_prep import LoadData, FeatDuct
path = os.getcwd() + '\data\\'
raw_data = LoadData(path)
data = FeatDuct(raw_data, Input_Only=True)  #leave only model input
data_complete = pd.read_csv(path + "data_complete.csv")

# DATA SELECTION FOR GRAKN TESTING
data = pd.concat(
    [data.iloc[0:10, :], data.iloc[440:446, :], data.iloc[9020:9026, :]])
#data = pd.concat([data.iloc[0:3,:],data.iloc[440:443,:]])
#data = data.iloc[9020:9022,:]
# Existing elements in the graph are those that pre-exist in the graph, and should be predicted to continue to exist
PREEXISTS = 0
# Candidates are neither present in the input nor in the solution, they are negative samples
CANDIDATE = 1
# Elements to infer are the graph elements whose existence we want to predict to be true, they are positive samples
TO_INFER = 2
Ejemplo n.º 3
0
import seaborn as sns
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter
import os
from data_prep import LoadData, FeatDuct, EncodeData, CreateSplits, TrainTestSplit, FeatBathy, FeatSSPvec, FeatSSPId, FeatSSPStat
from xgb_mylib import f1_eval_class
from data_analysis_lib import PlotCorrelation, ICEPlot
""""
A PDP is the average of the lines of an ICE plot.
Unlike partial dependence plots, ICE curves can uncover heterogeneous relationships.
PDPs can obscure a heterogeneous relationship created by interactions. 
PDPs can show you what the average relationship between a feature and the prediction looks like. This only works well if the interactions between the features for which the PDP is calculated and the other features are weak. In case of interactions, the ICE plot will provide much more insight.
"""

path = os.getcwd() + '\data\\'
rawdata = LoadData(path)
data = FeatDuct(rawdata, Input_Only=True)  #just to leave only input data
data = FeatBathy(data, path)
data = FeatSSPId(data, path, src_cond=True)
data = FeatSSPStat(data, path)
data_enc = EncodeData(
    data)  #data with full features sspid, sspstat, but no ssp-vec

data_enc = data_enc.fillna(0)  #ICE plot func has problems with NaNs :(

target = 'num_rays'
features = data_enc.columns.tolist()
features.remove(target)
seasons = ['Autumn', 'Spring', 'Summer', 'Winter']
locations = [
    'Labrador-Sea', 'Mediterranean-Sea', 'North-Pacific-Ocean',