def train(): settings = Settings() batch_size = settings.get_training_parameters('batch_size') epochs = settings.get_training_parameters('epochs') model_builder = Model4Builder() model = model_builder() preprocessor = Preprocessor(model) preprocessor.load_data(['category', 'is_top_submission']) training_input = [preprocessor.training_data['category']] validation_input = [preprocessor.validation_data['category']] training_output = [preprocessor.training_data['is_top_submission']] validation_output = [preprocessor.validation_data['is_top_submission']] class_weights = calculate_class_weights(preprocessor.training_data['is_top_submission'], [ol.name for ol in model.output_layers]) callbacks = CallbackBuilder(model, [CsvLogger, CsvPlotter, ConfigLogger, ModelSaver])() model.fit(training_input, training_output, batch_size=batch_size, epochs=epochs, callbacks=callbacks, validation_data=(validation_input, validation_output), class_weight=class_weights)
def train(): settings = Settings() batch_size = settings.get_training_parameters('batch_size') epochs = settings.get_training_parameters('epochs') dictionary_size = settings.get_training_parameters('dictionary_size') max_headline_length = settings.get_training_parameters( 'max_headline_length') body_begin_length = settings.get_training_parameters('body_begin_length') glove = Glove(dictionary_size) glove.load_embedding() model_builder = Model234Builder() \ .set_input('glove', glove) \ .set_parameter('max_headline_length', max_headline_length)\ .set_parameter('body_begin_length', body_begin_length) model = model_builder() preprocessor = Preprocessor(model) preprocessor.set_encoder('glove', glove) preprocessor.set_parameter('max_headline_length', max_headline_length) preprocessor.set_parameter('body_begin_length', body_begin_length) preprocessor.load_data( ['headline', 'body_begin', 'category', 'is_top_submission']) training_input = [ preprocessor.training_data[key] for key in ['headline', 'body_begin', 'category'] ] validation_input = [ preprocessor.validation_data[key] for key in ['headline', 'body_begin', 'category'] ] training_output = [preprocessor.training_data['is_top_submission']] validation_output = [preprocessor.validation_data['is_top_submission']] class_weights = calculate_class_weights( preprocessor.training_data['is_top_submission'], [ol.name for ol in model.output_layers]) callbacks = CallbackBuilder( model, [CsvLogger, CsvPlotter, ConfigLogger, ModelSaver])() model.fit(training_input, training_output, batch_size=batch_size, epochs=epochs, callbacks=callbacks, validation_data=(validation_input, validation_output), class_weight=class_weights)
def train(): settings = Settings() batch_size = settings.get_training_parameters('batch_size') epochs = settings.get_training_parameters('epochs') max_headline_length = settings.get_training_parameters( 'max_headline_length') max_article_length = settings.get_training_parameters('max_article_length') headline_numeric_log = NumericLog(max_headline_length) article_numeric_log = NumericLog(max_article_length) model_builder = Model6Builder() \ .set_input('headline_numeric_log', headline_numeric_log) \ .set_input('article_numeric_log', article_numeric_log) model = model_builder() preprocessor = Preprocessor(model) preprocessor.set_encoder('headline_numeric_log', headline_numeric_log) preprocessor.set_encoder('article_numeric_log', article_numeric_log) preprocessor.load_data([ 'headline_log_representation', 'article_log_representation', 'is_top_submission' ]) training_input = [ preprocessor.training_data['headline_log_representation'], preprocessor.training_data['article_log_representation'] ] validation_input = [ preprocessor.validation_data['headline_log_representation'], preprocessor.validation_data['article_log_representation'] ] training_output = [preprocessor.training_data['is_top_submission']] validation_output = [preprocessor.validation_data['is_top_submission']] class_weights = calculate_class_weights( preprocessor.training_data['is_top_submission'], [ol.name for ol in model.output_layers]) callbacks = CallbackBuilder( model, [CsvLogger, CsvPlotter, ConfigLogger, ModelSaver])() model.fit(training_input, training_output, batch_size=batch_size, epochs=epochs, callbacks=callbacks, validation_data=(validation_input, validation_output), class_weight=class_weights)
def calculate_correlations(): arg_parse = ArgumentParser() arg_parse.add_argument('--model_1', type=str) arg_parse.add_argument('--model_2', type=str) arg_parse.add_argument('--model_3', type=str) arg_parse.add_argument('--model_4', type=str) arg_parse.add_argument('--model_5', type=str) arg_parse.add_argument('--model_6', type=str) arg_parse.add_argument('--model_7', type=str) arguments = arg_parse.parse_args() settings = Settings() default_parameters = settings.get_training_parameters() glove = Glove(default_parameters['dictionary_size']) glove.load_embedding() headline_numeric_log = NumericLog( default_parameters['max_headline_length']) article_numeric_log = NumericLog(default_parameters['max_article_length']) print('load data...') preprocessor = Preprocessor(None) preprocessor.set_encoder('glove', glove) preprocessor.set_encoder('headline_numeric_log', headline_numeric_log) preprocessor.set_encoder('article_numeric_log', article_numeric_log) preprocessor.set_parameter('max_headline_length', default_parameters['max_headline_length']) preprocessor.set_parameter('body_begin_length', default_parameters['body_begin_length']) preprocessor.load_data([ 'headline', 'body_begin', 'category', 'minute', 'hour', 'day_of_week', 'day_of_year', 'headline_log_representation', 'article_log_representation', 'competitive_score' ]) custom_objects = { 'precision': precision, 'recall': recall, 'f1': f1, } print('load models...') model_inputs = {} model_inputs['model_1'] = [preprocessor.test_data['headline']] model_inputs['model_2'] = [preprocessor.test_data['headline']] model_inputs['model_3'] = [preprocessor.test_data['body_begin']] model_inputs['model_4'] = [preprocessor.test_data['category']] model_inputs['model_5'] = [ preprocessor.test_data[key] for key in ['minute', 'hour', 'day_of_week', 'day_of_year'] ] model_inputs['model_6'] = [ preprocessor.test_data[key] for key in ['headline_log_representation', 'article_log_representation'] ] model_inputs['model_7'] = [preprocessor.test_data['competitive_score']] print('predict...') predictions = {} for model_name in model_inputs.keys(): if hasattr(arguments, model_name) and getattr(arguments, model_name): model = load_model(getattr(arguments, model_name), custom_objects=custom_objects) predictions[model_name] = np.round( model.predict(model_inputs[model_name])) print('calculate correlation...') for model_name_1 in predictions.keys(): for model_name_2 in predictions.keys(): if model_name_1 != model_name_2: correlation = np.corrcoef(predictions[model_name_1][:, -1], predictions[model_name_2][:, -1])[0] print(model_name_1, model_name_2, correlation[1])