Beispiel #1
0
def startjob(
        csv_file_path=r'C:\Users\57855\Desktop\2%test.csv',  #训练文件的路径
        model_file=r'C:\Users\57855\Desktop\2%.yaml',  #模型配置文件路径
        test_file=r'C:\Users\57855\Desktop\2%test_data.csv'):  #结果输出路径
    #Lugwig教程上的代码
    with open(model_file, encoding='utf-8', mode='r') as file:
        model_definition = yaml.load(file.read())
        print(model_definition)
        ludwig_model = LudwigModel(model_definition)
        train_stats = ludwig_model.train(csv_file_path,
                                         logging_level=logging_DEBUG)
        print(train_stats)
        predictions = ludwig_model.predict(test_file,
                                           logging_level=logging_DEBUG)
        print(predictions)
        ludwig_model.close()
Beispiel #2
0
from ludwig import LudwigModel

model_definition = {
    "input_features": [{
        "name": "doc_text",
        "type": "text"
    }],
    "output_features": [{
        "name": "class",
        "type": "category"
    }]
}

ludwig_model = LudwigModel(model_definition)
train_stats = ludwig_model.train(
    data_csv="gs://skyl-dev-ml/playground/ludwig/train.csv")

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--train-files',
        nargs='+',
        help='Training file local or GCS',
        default=
        'gs://skyl-dev-ml/aashishdahiya/flowers_aashishdahiya_testV3_20181205_134432/preproc/train*'
    )

    parser.add_argument(
        '--eval-files',
        nargs='+',
Beispiel #3
0
			"type": "sequence",
			"encoder": "rnn",
			"cell_type": "lstm",
			"bidirectional": true,
			"num_layers": 2,
			"reduce_output": null
		}
	],
	"output_features": [
		{
			"name": "intent",
			"type": "category",
			"reduce_input": "sum",
			"num_fc_layers": 1,
			"fc_size": 64
		}
	]
}"""

model_definition = json.loads(data)

ludwig_model = LudwigModel(model_definition)
loaded_ludwig_model = ludwig_model
train_stats = ludwig_model.train(data_csv='train.csv')

ludwig_model.save(MODEL_PATH)
loaded_ludwig_model.load(MODEL_PATH)
predictions = loaded_ludwig_model.predict(data_csv='test.csv')

print(predictions)
Beispiel #4
0
    def train(self):
        training_dataframe, model_definition = self._create_ludwig_dataframe(
            'train')
        if self.transaction.lmd['model_order_by'] is None:
            timeseries_cols = []
        else:
            timeseries_cols = list(
                map(lambda x: x[0], self.transaction.lmd['model_order_by']))

        if len(timeseries_cols) > 0:
            training_dataframe, model_definition = self._translate_df_to_timeseries_format(
                training_dataframe, model_definition, timeseries_cols, 'train')

        with disable_ludwig_output():

            model = LudwigModel(model_definition)

            # <---- Ludwig currently broken, since mode can't be initialized without train_set_metadata and train_set_metadata can't be obtained without running train... see this issue for any updates on the matter: https://github.com/uber/ludwig/issues/295
            #model.initialize_model(train_set_metadata={})
            #train_stats = model.train_online(data_df=training_dataframe) # ??Where to add model_name?? ----> model_name=self.transaction.lmd['name']

            if self.transaction.lmd['rebuild_model'] is True:
                train_stats = model.train(
                    data_df=training_dataframe,
                    model_name=self.transaction.lmd['name'],
                    skip_save_model=True)
            else:
                model = LudwigModel.load(
                    self.transaction.lmd['ludwig_data']['ludwig_save_path'])
                train_stats = model.train(
                    data_df=training_dataframe,
                    model_name=self.transaction.lmd['name'],
                    skip_save_model=True)
                #,model_load_path=self.transaction.lmd['ludwig_data']['ludwig_save_path'])

            for k in train_stats['train']:
                if k not in self.transaction.lmd['model_accuracy']['train']:
                    self.transaction.lmd['model_accuracy']['train'][k] = []
                    self.transaction.lmd['model_accuracy']['test'][k] = []
                elif k is not 'combined':
                    # We should be adding the accuracy here but we only have it for combined, so, for now use that, will only affect multi-output scenarios anyway
                    pass
                else:
                    self.transaction.lmd['model_accuracy']['train'][k].extend(
                        train_stats['train'][k]['accuracy'])
                    self.transaction.lmd['model_accuracy']['test'][k].extend(
                        train_stats['test'][k]['accuracy'])
                '''
                @ TRAIN ONLINE BIT That's not working
                model = LudwigModel.load(self.transaction.lmd['ludwig_data']['ludwig_save_path'])
                for i in range(0,100):
                    train_stats = model.train_online(data_df=training_dataframe)
                    # The resulting train_stats are "None"... wonderful -_-
                '''

        ludwig_model_savepath = Config.LOCALSTORE_PATH.rstrip(
            'local_jsondb_store') + self.transaction.lmd['name']

        model.save(ludwig_model_savepath)
        model.close()

        self.transaction.lmd['ludwig_data'] = {
            'ludwig_save_path': ludwig_model_savepath
        }
        self.transaction.hmd['ludwig_data'] = {
            'model_definition': model_definition
        }
from ludwig import LudwigModel
import pandas as pd

df = pd.read_csv('Tweets.csv')
print(df.head())

model_definition = {
    'input_features':[
        {'name':'text', 'type':'text'},
    ],
    'output_features': [
        {'name': 'airline_sentiment', 'type': 'category'}
    ]
}

print('creating model')
model = LudwigModel(model_definition)
print('training model')
train_stats = model.train(data_df=df)
model.close()
Beispiel #6
0
        'name': 'oldpeak',
        'type': 'numerical',
        'encoder': 'rnn'
    }, {
        'name': 'slope',
        'type': 'category',
        'encoder': 'rnn'
    }, {
        'name': 'ca',
        'type': 'category',
        'encoder': 'rnn'
    }, {
        'name': 'thal',
        'type': 'category',
        'encoder': 'rnn'
    }],
    'output_features': [{
        'name': 'target',
        'type': 'binary'
    }],
    'training': {
        'epochs': 10
    }
}
model = LudwigModel(model_definition)
train_stats = model.train(data)

# obtain predictions
predictions = model.predict(data)

model.close()