def run(self, dirpath, file): self.common = common.Common() root_path = self.common.createDirByKey(file, self.key) filelist = os.listdir(dirpath) for zipfile in filelist: cmd = "unzip -jxo " + dirpath + '/' + zipfile + ' -d ' + root_path print 'cmd :', cmd rtn = os.system(cmd) if rtn == 0: filename = dirpath + '/' + zipfile os.remove(filename) return True
def get_train_data(self): common = cm.Common() # train data url = "people.csv" #train_data = TextClassificationPredict.connectMysql() train_data = TextClassificationPredict.readCSV(url) checkdata = TextClassificationPredict.readCSV("peoplemaster.csv") print(checkdata) df_train = pd.DataFrame(train_data) chectrain = pd.DataFrame(checkdata) df_train['category_id'] = df_train['master_room_type'].factorize()[0] train_outcome = pd.crosstab( index=train_data["master_room_type"], # Make a crosstab columns="count") # Name the count column df_train['room_name'] = df_train["room_name"].apply( TextClassificationPredict.clean_text) chectrain['room_name'] = chectrain["room_name"].apply( TextClassificationPredict.clean_text) dfview = df_train.drop(df_train[df_train['view'] == "Other"].index) dfBedType = df_train.drop( df_train[df_train['bedType'] == "Other"].index) dfBed = df_train.drop(df_train[df_train['bed'] == "Other"].index) target = train_data['master_room_type'] #target = checkdata['master_room_type'] targetview = dfview['view'] targetBedType = dfBedType['bedType'] targetBed = dfBed['bed'] traindata, testdata, labels_train, labels_test = train_test_split( df_train, target, test_size=0.2, random_state=10) traindataview, testdataview, labels_trainview, labels_testview = train_test_split( dfview, targetview, test_size=0.2, random_state=10) traindataBedType, testdataBedType, labels_trainBedType, labels_testBedType = train_test_split( dfBedType, targetBedType, test_size=0.2, random_state=10) traindataBed, testdataBed, labels_trainBed, labels_testBed = train_test_split( dfBed, targetBed, test_size=0.2, random_state=10) #model = NaiveBayesModel() model = SVMModel() modelview = SVMModel() modelBedType = SVMModel() modelBed = SVMModel() clf = model.clf.fit(traindata["room_name"], traindata.master_room_type) clfview = modelview.clf.fit(traindataview["room_name"], traindataview.view) clfBedType = modelBedType.clf.fit(traindataBedType["room_name"], traindataBedType.bedType) clfBed = modelBed.clf.fit(traindataBed["room_name"], traindataBed.bed) predicted = clf.predict(testdata['room_name'].apply( TextClassificationPredict.clean_text)) predictedview = clfview.predict(testdataview['room_name'].apply( TextClassificationPredict.clean_text)) predictedBedType = clfBedType.predict( testdataBedType['room_name'].apply( TextClassificationPredict.clean_text)) predictedBed = clfBed.predict(testdataBed['room_name'].apply( TextClassificationPredict.clean_text)) #print (predicted) print('accuracy %s' % accuracy_score(predicted, labels_test)) print('accuracyView %s' % accuracy_score(predictedview, labels_testview)) print('accuracyBedType %s' % accuracy_score(predictedBedType, labels_testBedType)) print('accuracyBed %s' % accuracy_score(predictedBed, labels_testBed)) a = clf.predict_proba(testdata["room_name"]) TextClassificationPredict.save_model( os.path.abspath(os.path.dirname(__file__)) + "/x_transformer.pkl", clf) TextClassificationPredict.save_model( os.path.abspath(os.path.dirname(__file__)) + "/x_transformerView.pkl", clfview) TextClassificationPredict.save_model( os.path.abspath(os.path.dirname(__file__)) + "/x_transformerBedType.pkl", clfBedType) TextClassificationPredict.save_model( os.path.abspath(os.path.dirname(__file__)) + "/x_transformerViewBed.pkl", clfBed) dt = pd.DataFrame(testdata) dt["predicted"] = predicted
def run(self, dirname, file): self.common = common.Common() return self.common.common_proc(dirname, file, self.key, self.module_filename)