def __init__(self, dev_name, model, is_model_pkl=False, use_cols=None, y_col=None, train=None, is_train_csv=False): self.dev_name = dev_name if is_model_pkl: self.load_model_from_pkl(model) else: self.model = model if use_cols is not None: self.use_cols = use_cols else: self.use_cols = pd.read_csv(os.path.abspath('data/use_cols.csv')) if y_col: self.y_col = y_col else: self.y_col = 'device_category' if train: if is_train_csv: train = utils.load_data_from_csv(train, use_cols) elif use_cols: train = train[use_cols] self.train(train)
def get_node2vec_data(self): ''' Need to work on it. Get page embeddings #Harman ''' df_relation, df_output = utils.load_data_from_csv(dtype="relation") df_n2v = "" return df_n2v
def run_multi_dev_experiments(datasets, dev_model_csv, pred_methods): y_col = 'device_category' use_cols = pd.read_csv(os.path.abspath('data/use_cols.csv')) metrics_headers = list(pd.read_csv(os.path.abspath('data/metrics_headers.csv'))) dev_model_combos = pd.read_csv(dev_model_csv) for dataset_name in datasets: print('@', dataset_name) dataset = utils.load_data_from_csv('data/{}.csv'.format(dataset_name), use_cols=use_cols) for idx, dev_model_combo in dev_model_combos.iterrows(): print('@@', dev_model_combo) for pred_method in pred_methods: multi_dev_cls = MultipleDeviceClassifier( dev_model_combo, is_model_pkl=True, pred_method=pred_method, use_cols=use_cols, y_col=y_col) eval_classifier( classifier=multi_dev_cls, dataset=dataset, model_name=dev_model_combo.to_dict(), dataset_name=dataset_name, classification_method='multi_dev', seq_len=dev_model_combo.to_dict(), opt_seq_len=dev_model_combo.to_dict(), metrics_headers=metrics_headers, metrics_dir=metrics_dir)
def eval_on_dataset(self, dataset, is_dataset_csv=False): if is_dataset_csv: dataset = utils.load_data_from_csv(dataset, self.use_cols) # Split data to features and labels x, y_true = utils.split_data(dataset, self.y_col) # Classify data y_pred = self.predict(x) # Evaluate predictions return utils.eval_predictions(y_true, y_pred)
def get_relation_data(self): df_relation, df_output = utils.load_data_from_csv(dtype="relation") # Getting sparse matrix based on page likes df_relation_matrix = utils.get_transformed_relation(df_relation, min_likes=5) df_relation_matrix = pd.merge(df_relation_matrix, df_output, left_on="userid", right_on="userid", how="outer") # Filling mean values for users with no page likes (among the pages selected) df_relation_matrix.fillna(df_relation_matrix.mean(), inplace=True) return df_relation_matrix
def run_experiment_with_datasets_devices(exp, datasets, devices, models_dir, metrics_dir): y_col = 'device_category' use_cols = pd.read_csv(os.path.abspath('data/use_cols.csv')) metrics_headers = list(pd.read_csv(os.path.abspath('data/metrics_headers.csv'))) for dataset_name in datasets: print('@', dataset_name) dataset = utils.load_data_from_csv('data/{}.csv'.format(dataset_name), use_cols=use_cols) for dev_name in devices: print('@@', dev_name) for model_pkl in os.listdir(os.path.join(models_dir, dev_name)): model_name = os.path.splitext(model_pkl)[0] print('@@@', model_name) exp(y_col, use_cols, metrics_headers, models_dir, metrics_dir, dataset_name, dataset, dev_name, model_pkl, model_name)
def __init__(self, dev_name, model, is_model_pkl=False, opt_seq_len=1, use_cols=None, y_col=None, train=None, is_train_csv=False, validation=None, is_validation_csv=False): super().__init__(dev_name=dev_name, model=model, is_model_pkl=is_model_pkl, use_cols=use_cols, y_col=y_col, train=train, is_train_csv=is_train_csv) if train and validation: if is_validation_csv: validation = utils.load_data_from_csv(validation, use_cols) self.find_opt_seq_len(validation) else: self.opt_seq_len = opt_seq_len
def get_text_data(self, target): df_text, _ = utils.load_data_from_csv(dtype="text") df_text, y = utils.extract_data(df_text, target, type="text") # df_text = preprocess(df_text, dtype="text") return df_text, y
def get_face_data(self, target): df_face, _ = utils.load_data_from_csv(dtype="face") df_face, y = utils.extract_data(df_face, target, type="face") # df_face = preprocess(df_face, dtype="face") return df_face, y
def fetch_node2vec_data(self): df_relation, df_output = utils.load_data_from_csv(dtype="relation") df_n2v = "" return df_n2v
def fetch_text_data(self): df_text, _ = utils.load_data_from_csv(dtype="text") df_text = preprocess(df_text, dtype="text") return df_text
def fetch_face_data(self): df_face, _ = utils.load_data_from_csv(dtype="face") df_face = preprocess(df_face, dtype="face") return df_face
def fetch_text_data(self): df_text, df_output = utils.load_data_from_csv(dtype="text") return df_text
def fetch_face_data(self): df_face, df_output = utils.load_data_from_csv(dtype="face") return df_face