Example #1
0
def startjob(
        csv_file_path=r'C:\Users\57855\Desktop\2%test.csv',  #训练文件的路径
        model_file=r'C:\Users\57855\Desktop\2%.yaml',  #模型配置文件路径
        test_file=r'C:\Users\57855\Desktop\2%test_data.csv'):  #结果输出路径
    #Lugwig教程上的代码
    with open(model_file, encoding='utf-8', mode='r') as file:
        model_definition = yaml.load(file.read())
        print(model_definition)
        ludwig_model = LudwigModel(model_definition)
        train_stats = ludwig_model.train(csv_file_path,
                                         logging_level=logging_DEBUG)
        print(train_stats)
        predictions = ludwig_model.predict(test_file,
                                           logging_level=logging_DEBUG)
        print(predictions)
        ludwig_model.close()
Example #2
0
    def train(self):
        training_dataframe, model_definition = self._create_ludwig_dataframe(
            'train')
        if self.transaction.lmd['model_order_by'] is None:
            timeseries_cols = []
        else:
            timeseries_cols = list(
                map(lambda x: x[0], self.transaction.lmd['model_order_by']))

        if len(timeseries_cols) > 0:
            training_dataframe, model_definition = self._translate_df_to_timeseries_format(
                training_dataframe, model_definition, timeseries_cols, 'train')

        with disable_ludwig_output():

            model = LudwigModel(model_definition)

            # <---- Ludwig currently broken, since mode can't be initialized without train_set_metadata and train_set_metadata can't be obtained without running train... see this issue for any updates on the matter: https://github.com/uber/ludwig/issues/295
            #model.initialize_model(train_set_metadata={})
            #train_stats = model.train_online(data_df=training_dataframe) # ??Where to add model_name?? ----> model_name=self.transaction.lmd['name']

            if self.transaction.lmd['rebuild_model'] is True:
                train_stats = model.train(
                    data_df=training_dataframe,
                    model_name=self.transaction.lmd['name'],
                    skip_save_model=True)
            else:
                model = LudwigModel.load(
                    self.transaction.lmd['ludwig_data']['ludwig_save_path'])
                train_stats = model.train(
                    data_df=training_dataframe,
                    model_name=self.transaction.lmd['name'],
                    skip_save_model=True)
                #,model_load_path=self.transaction.lmd['ludwig_data']['ludwig_save_path'])

            for k in train_stats['train']:
                if k not in self.transaction.lmd['model_accuracy']['train']:
                    self.transaction.lmd['model_accuracy']['train'][k] = []
                    self.transaction.lmd['model_accuracy']['test'][k] = []
                elif k is not 'combined':
                    # We should be adding the accuracy here but we only have it for combined, so, for now use that, will only affect multi-output scenarios anyway
                    pass
                else:
                    self.transaction.lmd['model_accuracy']['train'][k].extend(
                        train_stats['train'][k]['accuracy'])
                    self.transaction.lmd['model_accuracy']['test'][k].extend(
                        train_stats['test'][k]['accuracy'])
                '''
                @ TRAIN ONLINE BIT That's not working
                model = LudwigModel.load(self.transaction.lmd['ludwig_data']['ludwig_save_path'])
                for i in range(0,100):
                    train_stats = model.train_online(data_df=training_dataframe)
                    # The resulting train_stats are "None"... wonderful -_-
                '''

        ludwig_model_savepath = Config.LOCALSTORE_PATH.rstrip(
            'local_jsondb_store') + self.transaction.lmd['name']

        model.save(ludwig_model_savepath)
        model.close()

        self.transaction.lmd['ludwig_data'] = {
            'ludwig_save_path': ludwig_model_savepath
        }
        self.transaction.hmd['ludwig_data'] = {
            'model_definition': model_definition
        }
from ludwig import LudwigModel
import pandas as pd

df = pd.read_csv('Tweets.csv')
print(df.head())

model_definition = {
    'input_features':[
        {'name':'text', 'type':'text'},
    ],
    'output_features': [
        {'name': 'airline_sentiment', 'type': 'category'}
    ]
}

print('creating model')
model = LudwigModel(model_definition)
print('training model')
train_stats = model.train(data_df=df)
model.close()