if clf_package == 'libsvm' and not clf_name == 'SVM': print 'libsvm only suppoort SVM classifer' exit() method = sys.argv[5] if not (method == 'forward' or method == 'backward'): print 'fifth argument is selecting method' exit() index_df = read_csv(os.path.expanduser('./Dataset/'+dataset+'/index/'+algthm_name+'_'+clf_name+'.csv')) feat_ind = (index_df[index_df.columns[:-1]].values)[0] #Read data set from file dir_path = './Dataset/' + dataset + '/' datafile = dir_path + dataset + '.csv' X,y = load(datafile, True if dataset == 'HDR' else False) if dataset == 'ARR': y = [ 1 if yi==1 else -1 for yi in y]# 1 means normal, other cases are abnormal #Data preprocessing X = DataPreprocessing(X, dataset) #Setting of classifer if clf_name == 'NB': #clf = GaussianNB() #clf = MultinomialNB(fit_prior=False) #X += 1 clf = BernoulliNB() elif clf_name == 'SVM': clf = SVC(kernel='linear', C=1) elif clf_name == 'LDA':
def getData(): '''LOAD TRAIN DATA''' # Load train data: X featureData = load_features('input') XtrainAll = featureData['TRAIN_F'] Xcols = XtrainAll.columns.tolist( ) #business_id, r_mean, r_sd, g_mean, g_sd, b_mean, b_sd, imagecount, h_mean, h_sd, w_mean, w_sd # Load train data: Y data = load('input') YtrainAll = data['Y_TRAIN'] Ycols = YtrainAll.columns.tolist() #business_id, 0, 1, 2, 3, 4, 5, 6, 7, 8 '''SPLIT TRAINALL, TRAIN AND VALIDATION SET''' #merge X and Y. Reasons: order should be the same. Labels could contain businesses that are removed during preprocessing. trainAllData = pd.merge(XtrainAll, YtrainAll, on='business_id') #load which business ids should be in the train set and which should be in the validation set trainSetIds = np.load('input/trainSet.npy') validSetIds = np.load('input/verifSet.npy') #create dataframes of photo indices for train and validation set trainData = trainAllData[trainAllData.business_id.isin(trainSetIds)] validationData = trainAllData[trainAllData.business_id.isin(validSetIds)] #save business_id order of all sets busIdsTrainAll = trainAllData['business_id'].values busIdsTrain = trainData['business_id'].values busIdsVal = validationData['business_id'].values #split X and Y data, remove business_ids del Xcols[0] #remove business_id from list del Ycols[0] #remove business_id from list XtrainAll = trainAllData[Xcols].values YtrainAll = trainAllData[Ycols].values Xtrain = trainData[Xcols].values Ytrain = trainData[Ycols].values Xvalidation = validationData[Xcols].values Yvalidation = validationData[Ycols].values '''LOAD TEST DATA''' #create array from test data XtestDF = featureData['TEST_F'] Xtest = XtestDF[Xcols].values '''SAVE ALL DATA IN DICTIONARY''' data = { 'Xtrain': Xtrain, 'busIdsTrain': busIdsTrain, 'XtrainAll': XtrainAll, 'busIdsTrainAll': busIdsTrainAll, 'Xvalidation': Xvalidation, 'busIdsVal': busIdsVal, 'Ytrain': Ytrain, 'YtrainAll': YtrainAll, 'Yvalidation': Yvalidation, 'Xtest': Xtest, 'XtestDF': XtestDF, } return data
clf = BernoulliNB() elif clf_name == 'SVM': clf = SVC(kernel='linear', C=1) elif clf_name == 'LDA': clf = LDA() else: raise Exception('Incorrect setting of classifer: {}'.format(clf_name)) logger.info('dataset') logger.info(dataset) logger.info('clf_name') logger.info(str(clf)) # logger.info('clf_package') # logger.info(clf_package) # Read data set from file X, y = load(datafile, is_shuffle=False if dataset == 'ARR' else True) if dataset == 'ARR': y = [ 1 if yi == 1 else -1 for yi in y] # 1 means normal, other cases are abnormal logger.debug('X') logger.debug(X) logger.debug('y') logger.debug(y) # Data preprocessing X = DataPreprocessing(X, dataset) logger.debug('X after preprocessing') logger.debug(X) n_sample = X.shape[0] # Run mRMR algorithm error_mean = []
ft_1_level = [self.level1(name, nodes) for name in self.features["1-level"] if name not in filters] + \ [self.level1(name, nodes) for name in self.features["3-level"] if name not in filters] ft_2_level = [self.level2(name, nodes) for name in self.features["3-level"] if name not in filters] ft_3_level = [self.level3(name, nodes) for name in self.features["3-level"] if name not in filters] return np.array(ft_1_level + ft_2_level + ft_3_level) if __name__ == "__main__": import time from tqdm import tqdm exitFlag = 0 from LoadData import load G, train, test, disconnected = load() precompute = precomputeData(G, recompute=False) GFE = GraphFeaturesExtractor(G, precompute) import sys if sys.argv[1] =="train": data = train idx = np.load('precompute/order.npy') save_dir = "generated_data/embs_train/" if sys.argv[1] =="test": data = test idx = np.load('precompute/order_test.npy') save_dir = "generated_data/embs_test/" i = int(sys.argv[2]) sub_size = idx.shape[0] // 4
# exit() method = sys.argv[4] if not (method == 'forward' or method == 'backward'): print('fourth argument is a method') exit() index_df = read_csv( os.path.expanduser('./Dataset/' + dataset + '/index/' + algthm_name + '_' + clf_name + '.csv')) feat_ind = index_df[index_df.columns[:-1]].values[0] # Read data set from file dir_path = './Dataset/' + dataset + '/' datafile = dir_path + dataset + '.csv' X, y = load(datafile, True if dataset == 'HDR' else False) if dataset == 'ARR': y = [1 if yi == 1 else -1 for yi in y] # 1 means normal, other cases are abnormal # Data pre-processing X = DataPreprocessing(X, dataset) # Setting of classifier if clf_name == 'NB': # clf = GaussianNB() # clf = MultinomialNB(fit_prior=False) clf = BernoulliNB() elif clf_name == 'SVM': clf = SVC(kernel='linear', C=1) elif clf_name == 'LDA':
# ============================== proportion_test = 0.1 # use 10% to test region_num = 5 # how many regions that a map contains # create inter_data path_data = 'data' # the file for caching data path_model = 'model' # the file for caching model cache_name = 'inter_data' # cache file name result_name = 'result' # result file name # create file if os.path.isdir(cache_name) is False: os.mkdir(cache_name) cache_path = r'/home/ryj/renyajie/exp/GLST_Net/inter_data' cache_data_path = os.path.join(cache_path, path_data) cache_model_path = os.path.join(cache_path, path_model) cache_result = os.path.join(cache_path, result_name) if os.path.isdir(cache_data_path) is False: os.mkdir(cache_data_path) if os.path.isdir(cache_model_path) is False: os.mkdir(cache_model_path) if os.path.isdir(cache_result) is False: os.mkdir(cache_result) if __name__ == '__main__': from LoadData import load load(test_proportion)
#clf = MultinomialNB(fit_prior=False) clf = BernoulliNB() elif clf_name == 'SVM': clf = SVC(kernel='linear', C=1) elif clf_name == 'LDA': clf = LDA() logger.info('dataset') logger.info(dataset) logger.info('clf_name') logger.info(str(clf)) logger.info('clf_package') logger.info(clf_package) #Read data set from file X,y = load(datafile, False if dataset == 'ARR' else True) if dataset == 'ARR': y = [ 1 if yi==1 else -1 for yi in y]# 1 means normal, other cases are abnormal logger.debug('X') logger.debug(X) logger.debug('y') logger.debug(y) #Data preprocessing X = DataPreprocessing(X, dataset) logger.debug('X after preprocessing') logger.debug(X) n_sample = X.shape[0] #Run mRMR algorithm error_mean = []