def startjob( csv_file_path=r'C:\Users\57855\Desktop\2%test.csv', #训练文件的路径 model_file=r'C:\Users\57855\Desktop\2%.yaml', #模型配置文件路径 test_file=r'C:\Users\57855\Desktop\2%test_data.csv'): #结果输出路径 #Lugwig教程上的代码 with open(model_file, encoding='utf-8', mode='r') as file: model_definition = yaml.load(file.read()) print(model_definition) ludwig_model = LudwigModel(model_definition) train_stats = ludwig_model.train(csv_file_path, logging_level=logging_DEBUG) print(train_stats) predictions = ludwig_model.predict(test_file, logging_level=logging_DEBUG) print(predictions) ludwig_model.close()
def train(self): training_dataframe, model_definition = self._create_ludwig_dataframe( 'train') if self.transaction.lmd['model_order_by'] is None: timeseries_cols = [] else: timeseries_cols = list( map(lambda x: x[0], self.transaction.lmd['model_order_by'])) if len(timeseries_cols) > 0: training_dataframe, model_definition = self._translate_df_to_timeseries_format( training_dataframe, model_definition, timeseries_cols, 'train') with disable_ludwig_output(): model = LudwigModel(model_definition) # <---- Ludwig currently broken, since mode can't be initialized without train_set_metadata and train_set_metadata can't be obtained without running train... see this issue for any updates on the matter: https://github.com/uber/ludwig/issues/295 #model.initialize_model(train_set_metadata={}) #train_stats = model.train_online(data_df=training_dataframe) # ??Where to add model_name?? ----> model_name=self.transaction.lmd['name'] if self.transaction.lmd['rebuild_model'] is True: train_stats = model.train( data_df=training_dataframe, model_name=self.transaction.lmd['name'], skip_save_model=True) else: model = LudwigModel.load( self.transaction.lmd['ludwig_data']['ludwig_save_path']) train_stats = model.train( data_df=training_dataframe, model_name=self.transaction.lmd['name'], skip_save_model=True) #,model_load_path=self.transaction.lmd['ludwig_data']['ludwig_save_path']) for k in train_stats['train']: if k not in self.transaction.lmd['model_accuracy']['train']: self.transaction.lmd['model_accuracy']['train'][k] = [] self.transaction.lmd['model_accuracy']['test'][k] = [] elif k is not 'combined': # We should be adding the accuracy here but we only have it for combined, so, for now use that, will only affect multi-output scenarios anyway pass else: self.transaction.lmd['model_accuracy']['train'][k].extend( train_stats['train'][k]['accuracy']) self.transaction.lmd['model_accuracy']['test'][k].extend( train_stats['test'][k]['accuracy']) ''' @ TRAIN ONLINE BIT That's not working model = LudwigModel.load(self.transaction.lmd['ludwig_data']['ludwig_save_path']) for i in range(0,100): train_stats = model.train_online(data_df=training_dataframe) # The resulting train_stats are "None"... wonderful -_- ''' ludwig_model_savepath = Config.LOCALSTORE_PATH.rstrip( 'local_jsondb_store') + self.transaction.lmd['name'] model.save(ludwig_model_savepath) model.close() self.transaction.lmd['ludwig_data'] = { 'ludwig_save_path': ludwig_model_savepath } self.transaction.hmd['ludwig_data'] = { 'model_definition': model_definition }
from ludwig import LudwigModel import pandas as pd df = pd.read_csv('Tweets.csv') print(df.head()) model_definition = { 'input_features':[ {'name':'text', 'type':'text'}, ], 'output_features': [ {'name': 'airline_sentiment', 'type': 'category'} ] } print('creating model') model = LudwigModel(model_definition) print('training model') train_stats = model.train(data_df=df) model.close()