Example #1
0
	if clf_package == 'libsvm' and not clf_name == 'SVM':
		print 'libsvm only suppoort SVM classifer'
		exit()

	method = sys.argv[5]
	if not (method == 'forward' or method == 'backward'):
		print 'fifth argument is selecting method'
		exit()

	index_df = read_csv(os.path.expanduser('./Dataset/'+dataset+'/index/'+algthm_name+'_'+clf_name+'.csv'))
	feat_ind = (index_df[index_df.columns[:-1]].values)[0]

	#Read data set from file
	dir_path = './Dataset/' + dataset + '/'
	datafile = dir_path + dataset + '.csv'
	X,y = load(datafile, True if dataset == 'HDR' else False)
	if dataset == 'ARR':
		y = [ 1 if yi==1 else -1 for yi in y]# 1 means normal, other cases are abnormal

	#Data preprocessing
	X = DataPreprocessing(X, dataset)
	
	#Setting of classifer
	if clf_name == 'NB':
		#clf = GaussianNB()
		#clf = MultinomialNB(fit_prior=False)
		#X += 1
		clf = BernoulliNB()
	elif clf_name == 'SVM':
		clf = SVC(kernel='linear', C=1)
	elif clf_name == 'LDA':
def getData():
    '''LOAD TRAIN DATA'''

    # Load train data: X
    featureData = load_features('input')
    XtrainAll = featureData['TRAIN_F']
    Xcols = XtrainAll.columns.tolist(
    )  #business_id, r_mean, r_sd, g_mean, g_sd, b_mean, b_sd, imagecount, h_mean, h_sd, w_mean, w_sd

    # Load train data: Y
    data = load('input')
    YtrainAll = data['Y_TRAIN']
    Ycols = YtrainAll.columns.tolist()  #business_id, 0, 1, 2, 3, 4, 5, 6, 7, 8
    '''SPLIT TRAINALL, TRAIN AND VALIDATION SET'''

    #merge X and Y. Reasons: order should be the same. Labels could contain businesses that are removed during preprocessing.
    trainAllData = pd.merge(XtrainAll, YtrainAll, on='business_id')

    #load which business ids should be in the train set and which should be in the validation set
    trainSetIds = np.load('input/trainSet.npy')
    validSetIds = np.load('input/verifSet.npy')

    #create dataframes of photo indices for train and validation set
    trainData = trainAllData[trainAllData.business_id.isin(trainSetIds)]
    validationData = trainAllData[trainAllData.business_id.isin(validSetIds)]

    #save business_id order of all sets
    busIdsTrainAll = trainAllData['business_id'].values
    busIdsTrain = trainData['business_id'].values
    busIdsVal = validationData['business_id'].values

    #split X and Y data, remove business_ids
    del Xcols[0]  #remove business_id from list
    del Ycols[0]  #remove business_id from list
    XtrainAll = trainAllData[Xcols].values
    YtrainAll = trainAllData[Ycols].values
    Xtrain = trainData[Xcols].values
    Ytrain = trainData[Ycols].values
    Xvalidation = validationData[Xcols].values
    Yvalidation = validationData[Ycols].values
    '''LOAD TEST DATA'''

    #create array from test data
    XtestDF = featureData['TEST_F']
    Xtest = XtestDF[Xcols].values
    '''SAVE ALL DATA IN DICTIONARY'''

    data = {
        'Xtrain': Xtrain,
        'busIdsTrain': busIdsTrain,
        'XtrainAll': XtrainAll,
        'busIdsTrainAll': busIdsTrainAll,
        'Xvalidation': Xvalidation,
        'busIdsVal': busIdsVal,
        'Ytrain': Ytrain,
        'YtrainAll': YtrainAll,
        'Yvalidation': Yvalidation,
        'Xtest': Xtest,
        'XtestDF': XtestDF,
    }
    return data
Example #3
0
		clf = BernoulliNB()
	elif clf_name == 'SVM':
		clf = SVC(kernel='linear', C=1)
	elif clf_name == 'LDA':
		clf = LDA()
	else:
		raise Exception('Incorrect setting of classifer: {}'.format(clf_name))
	logger.info('dataset')
	logger.info(dataset)
	logger.info('clf_name')
	logger.info(str(clf))
	# logger.info('clf_package')
	# logger.info(clf_package)

	# Read data set from file
	X, y = load(datafile, is_shuffle=False if dataset == 'ARR' else True)
	if dataset == 'ARR':
		y = [ 1 if yi == 1 else -1 for yi in y]  # 1 means normal, other cases are abnormal
	logger.debug('X')
	logger.debug(X)
	logger.debug('y')
	logger.debug(y)

	# Data preprocessing
	X = DataPreprocessing(X, dataset)
	logger.debug('X after preprocessing')
	logger.debug(X)
	n_sample = X.shape[0]

	# Run mRMR algorithm
	error_mean = []
Example #4
0
        ft_1_level = [self.level1(name, nodes) for name in self.features["1-level"] if name not in filters] + \
                     [self.level1(name, nodes) for name in self.features["3-level"] if name not in filters]

        ft_2_level = [self.level2(name, nodes) for name in self.features["3-level"] if name not in filters]
        ft_3_level = [self.level3(name, nodes) for name in self.features["3-level"] if name not in filters]

        return np.array(ft_1_level + ft_2_level + ft_3_level)

if __name__ == "__main__":
    import time
    from tqdm import tqdm
    exitFlag = 0

    from LoadData import load
    G, train, test, disconnected = load()
    precompute = precomputeData(G, recompute=False)
    GFE = GraphFeaturesExtractor(G, precompute)

    import sys
    if sys.argv[1] =="train": 
        data = train
        idx = np.load('precompute/order.npy')
        save_dir = "generated_data/embs_train/"
    if sys.argv[1] =="test": 
        data = test
        idx = np.load('precompute/order_test.npy')
        save_dir = "generated_data/embs_test/"

    i = int(sys.argv[2])
    sub_size = idx.shape[0] // 4
Example #5
0
    # 	exit()

    method = sys.argv[4]
    if not (method == 'forward' or method == 'backward'):
        print('fourth argument is a method')
        exit()

    index_df = read_csv(
        os.path.expanduser('./Dataset/' + dataset + '/index/' + algthm_name +
                           '_' + clf_name + '.csv'))
    feat_ind = index_df[index_df.columns[:-1]].values[0]

    # Read data set from file
    dir_path = './Dataset/' + dataset + '/'
    datafile = dir_path + dataset + '.csv'
    X, y = load(datafile, True if dataset == 'HDR' else False)
    if dataset == 'ARR':
        y = [1 if yi == 1 else -1
             for yi in y]  # 1 means normal, other cases are abnormal

    # Data pre-processing
    X = DataPreprocessing(X, dataset)

    # Setting of classifier
    if clf_name == 'NB':
        # clf = GaussianNB()
        # clf = MultinomialNB(fit_prior=False)
        clf = BernoulliNB()
    elif clf_name == 'SVM':
        clf = SVC(kernel='linear', C=1)
    elif clf_name == 'LDA':
Example #6
0
# ==============================
proportion_test = 0.1  # use 10% to test
region_num = 5  # how many regions that a map contains

# create inter_data
path_data = 'data'  # the file for caching data
path_model = 'model'  # the file for caching model
cache_name = 'inter_data'  # cache file name
result_name = 'result'  # result file name

# create file
if os.path.isdir(cache_name) is False:
    os.mkdir(cache_name)

cache_path = r'/home/ryj/renyajie/exp/GLST_Net/inter_data'
cache_data_path = os.path.join(cache_path, path_data)
cache_model_path = os.path.join(cache_path, path_model)
cache_result = os.path.join(cache_path, result_name)

if os.path.isdir(cache_data_path) is False:
    os.mkdir(cache_data_path)
if os.path.isdir(cache_model_path) is False:
    os.mkdir(cache_model_path)
if os.path.isdir(cache_result) is False:
    os.mkdir(cache_result)

if __name__ == '__main__':
    from LoadData import load
    load(test_proportion)
Example #7
0
		#clf = MultinomialNB(fit_prior=False)
		clf = BernoulliNB()
	elif clf_name == 'SVM':
		clf = SVC(kernel='linear', C=1)
	elif clf_name == 'LDA':
		clf = LDA()

	logger.info('dataset')
	logger.info(dataset)
	logger.info('clf_name')
	logger.info(str(clf))
	logger.info('clf_package')
	logger.info(clf_package)

	#Read data set from file
	X,y = load(datafile, False if dataset == 'ARR' else True)
	if dataset == 'ARR':
		y = [ 1 if yi==1 else -1 for yi in y]# 1 means normal, other cases are abnormal
	logger.debug('X')
	logger.debug(X)
	logger.debug('y')
	logger.debug(y)

	#Data preprocessing
	X = DataPreprocessing(X, dataset)
	logger.debug('X after preprocessing')
	logger.debug(X)
	n_sample = X.shape[0]

	#Run mRMR algorithm	
	error_mean = []