def test_dame_gender_dataset2genderlist_method_returns_correct_result( self): g = Gender() gl = g.dataset2genderlist(dataset="files/names/all.csv") self.assertEqual(gl[0:4], [1, 1, 1, 1]) gl2 = g.dataset2genderlist(dataset="files/names/yob2017.txt") self.assertEqual(gl2[0:4], [0, 0, 0, 0])
elif (args.categorical == "noletters"): g.features_list2csv(categorical="noletters", path="files/names/allnoundefined.csv") features = "files/features_list_cat.csv" elif (args.categorical == "nocategorical"): g.features_list2csv(categorical="nocategorical", path="files/names/allnoundefined.csv") features = "files/features_list_no_cat.csv" else: g.features_list2csv(categorical="both", path="files/names/all.csv") features = "files/features_list.csv" ## STEP1: N COMPONENTS + 1 TARGET x = pd.read_csv(features) #print(x.columns) y = g.dataset2genderlist(dataset="files/names/allnoundefined.csv") #print(y) # STEP2: ADDING TARGET target = pd.DataFrame(data=y, columns=['target component']) finalDf = x.join(target) # STEP3: NORMALIZE DATA from sklearn import preprocessing data_scaled = pd.DataFrame(preprocessing.scale(finalDf), columns=finalDf.columns) # STEP4: PCA pca = PCA(n_components=int(args.components)) pca.fit_transform(data_scaled)