def get(self): increment_hit_counter(datastore_key_hits_streams_json) # Get database db = memcache.get(memcache_key_database) if db is None: logger.warn( 'memcache failed on key: {}'.format(memcache_key_database)) db_json = ndb_get_entity(JsonDatabase, datastore_key_database).value db = utils.json_to_dict(db_json) memcache.set(memcache_key_database, db) # Get last update time last_update_time = memcache.get(memcache_key_last_update) if last_update_time is None: logger.warn( 'memcache failed on key: {}'.format(memcache_key_last_update)) last_update_time = ndb_get_entity(Time, datastore_key_last_update).value memcache.set(memcache_key_last_update, last_update_time) json_obj = {'streams': db, 'last_update': last_update_time} json_str = utils.dict_to_json(json_obj) self.response.headers['Content-Type'] = 'application/json' self.response.out.write(json_str)
def read_spec(spec, spec_dir): path = os.path.join(spec_dir, "{}.json".format(spec)) assert os.path.exists(path), ( "Specification file '{}' does not exist".format(path)) spec_dict = utils.json_to_dict(path) return spec_dict
def backup_database(backup_key): db = memcache.get(memcache_key_database) if db is None: logger.warn('memcache failed on key: {}'.format(memcache_key_database)) db_json = ndb_get_entity(JsonDatabase, datastore_key_database).value db = utils.json_to_dict(db_json) memcache.set(memcache_key_database, db) db_json = utils.dict_to_json(db) logger.info('Backup database to key: {}'.format(backup_key)) ndb_set_value(JsonDatabase, backup_key, db_json)
def update_database(): db_json = ndb_get_entity(JsonDatabase, datastore_key_database).value db = utils.json_to_dict(db_json) current_streams = streams.get_current_streams() updated_db = streams.update_database(db, current_streams) updated_db_json = utils.dict_to_json(updated_db) ndb_set_value(JsonDatabase, datastore_key_database, updated_db_json) memcache.delete(memcache_key_database) memcache.set(memcache_key_database, updated_db)
def edit_database(db_json): db = utils.json_to_dict(db_json) utc_now = datetime.datetime.utcnow() datastore_key_database_backup = 'backup_edit_{}'.format( utc_now.strftime('%Y-%m-%d_%H-%M_%S')) backup_database(datastore_key_database_backup) ndb_set_value(JsonDatabase, datastore_key_database, db_json) memcache.delete(memcache_key_database) memcache.set(memcache_key_database, db)
def get_data(infile, x_params, y_params, sport_types): """ For each i, returns y_params[i] and x_params[i] where sport = sport_types[i]. Thus each point in the plot corresponds to one workout x_params list of parameters, must be present in the data y_params list of parameters, must be present in the data Must be true : len(x_params) == len(y_params) """ assert(len(x_params) == len(y_params)) assert(len(x_params) == len(sport_types)) assert(len(x_params) > 0) assert(len(y_params) > 0) n_params = len(x_params) #assert(sport in ["Running", "Cycling", "Walking", "Circuit Training", "Mountain biking"]) # create lists to store data objects objs = [] #for s in sport_types: #objs[s] = [] for i in range(0, n_params): objs.append(DataForPlot(xparam = x_params[i], yparam = y_params[i], sport = sport_types[i])) print "X parameters : " + str(x_params) print "Y parameters : " + str(y_params) print "Sports : " + str(sport_types) infile_basename, ext = os.path.splitext(infile) if (ext == ".gz"): f = gzip.open(infile) elif(ext == ".txt"): f = open(infile) else: raise Exception("File format not recognized") nw = 0 for line in f: # each line is a workout w = utils.json_to_dict(line.strip()) sport = w["sport"] for i in range(0, n_params): if (sport != sport_types[i]): continue xp = x_params[i]; yp = y_params[i] # x and y axis parameters if (w.has_key(xp) and w.has_key(yp)): mx = w[xp] my = w[yp] objs[i].add_point(mx, my) nw += 1 if (nw % 100000 == 0): print "Done processing %s workouts" % (nw) f.close() return objs
def condense_and_clean_data(infile, outfile): """ infile must be a .gz file generated by the sql_to_json_parser.py condense_and_clean_data will do the following: - replace trace data by averages - replace strings like '2.35 mi' to 2.35 """ t1 = time.time() fo = gzip.open(outfile, "w") fi = gzip.open(infile) precision = 6 # 6 digits after decimal param_formatter = ParamFormatter(precision = precision) n = 0 n_params_ignored = 0 n_values_ignored = 0 ignored_params = set() ignored_values = set() for line in fi: d = {} w = utils.json_to_dict(line.strip()) for k, v in w.items(): if (isinstance(v, list)): # replace trace data by averages v = round(numpy.mean(utils.remove_null_values_single(v)), precision) k = k + "(avg)" d[k] = v else: # convert and replace units - for example, convert '2.35 mi' to 2.35 try: v = param_formatter.to_number(k, v) d[k] = v except InvalidValueException as e: n_values_ignored += 1 ignored_values.add(e.value) except InvalidParamException as e: n_params_ignored += 1 ignored_params.add(e.param) w_str = utils.dict_to_json(d) fo.write(w_str + "\n") n += 1 if (n % 10000 == 0): print "Written %d workouts.." % (n) fi.close() fo.close() t2 = time.time() print "Time taken = " + str(t2 - t1) + " seconds" print "%d params ignored" % (n_params_ignored) print "List of ignored parameters : " + str(ignored_params) print "%d values ignored" % (n_values_ignored) print "List of ignored values : " + str(ignored_values) print "Total %d workouts written" % (n)
def get_stats(infile): workouts_for_param = {} workouts_for_sport = {} workouts_for_user = {} with gzip.open(infile) as f: nlines = 0 for line in f: d = utils.json_to_dict(line) # workouts per param for k in d.keys(): if (not workouts_for_param.has_key(k)): workouts_for_param[k] = 0 workouts_for_param[k] += 1 # workouts per sport type if (d.has_key("sport")): sport = d["sport"] if (not workouts_for_sport.has_key(sport)): workouts_for_sport[sport] = 0 workouts_for_sport[sport] += 1 # workouts per user user = d["user_id"] if (not workouts_for_user.has_key(user)): workouts_for_user[user] = 0 workouts_for_user[user] += 1 nlines += 1 if (nlines % 100000 == 0): print "Done with %d workouts.." % (nlines) # print stats print_stats(workouts_for_param, 100, "Parameter", "# Workouts") print_stats(workouts_for_user, 100, "User ID", "# Workouts") print_stats(workouts_for_sport, 100, "Sport", "# Workouts") n_users = len(workouts_for_user.keys()) print "Total number of users : ", n_users d = sorted(workouts_for_user.items(), key=operator.itemgetter(1)) #d.reverse() d_vals = [v for (k,v) in d] for i in range(10, 400, 30): print "Number of users with more than %d workouts : %d" % (i, n_users - np.searchsorted(d_vals, i))
def get(self): increment_hit_counter(datastore_key_hits_streams_json) # Get database db = memcache.get(memcache_key_database) if db is None: logger.warn( 'memcache failed on key: {}'.format(memcache_key_database)) db_json = ndb_get_entity(JsonDatabase, datastore_key_database).value db = utils.json_to_dict(db_json) memcache.set(memcache_key_database, db) # Get last update time last_update_time = memcache.get(memcache_key_last_update) if last_update_time is None: logger.warn( 'memcache failed on key: {}'.format(memcache_key_last_update)) last_update_time = ndb_get_entity(Time, datastore_key_last_update).value memcache.set(memcache_key_last_update, last_update_time) json_obj = dict() for key, value in db.items(): stream_type, stream_id = streams.database_type_and_id(key) if stream_type != 'afreeca': continue race = value['game_info']['race'] nickname = value['nickname'] json_obj[stream_id] = [nickname, race] # Output in Snipealot formatting json_str = '{\n' for key, value in sorted(json_obj.items()): json_str += ' "{}": [ "{}", "{}" ],\n'.format( key, value[0], value[1]) json_str = json_str[:-2] + '\n}\n' self.response.headers['Content-Type'] = 'application/json' self.response.out.write(json_str)
def read_data_as_lists(infile, sport, params, min_distance = 1.0, max_distance = 100.0, min_data_points = 200, min_duration = 100.0, max_duration = 172800.0): # min duration is 100 s print "Infile : ", infile print "params : ", params sport_missing = 0 param_missing = 0 n_ignore = 0 n = 0 data = [] formatter = ParamFormatter() with gzip.open(infile) as f: for line in f: if (sport not in line): ignore = True sport_missing += 1 elif ("hr" not in line or "distance" not in line or "duration" not in line): ignore = True param_missing += 1 else: d = utils.json_to_dict(line) example = [] ignore = False distance = float("-inf") duration = float("-inf") if (d.has_key("Distance")): try: distance = formatter.to_number("Distance", d["Distance"]) except: pass if (d.has_key("Duration")): try: duration = formatter.to_number("Duration", d["Duration"]) except: pass if (d["sport"] != sport): ignore = True sport_missing += 1 elif (distance < min_distance or duration < min_duration or (d.has_key("hr") and len(d["hr"]) < min_data_points) or duration > max_duration or distance > max_distance): ignore = True else: for k in params: if not d.has_key(k): param_missing += 1 ignore = True break else: example.append(d[k]) if (ignore): n_ignore += 1 else: data.append(example) n += 1 if (n % 100000 == 0): print "%d workouts read.." % (n) print "%d workouts did not match the sport" % (sport_missing) print "%d workouts did not contain one or more parameters" % (param_missing) print "%d workouts ignored.." % (n_ignore) print "%d workouts successfully returned.." % (len(data)) return data
def generate_seed(project, bugnumber, output): """ generates a file that contains json info to run d4j and lithium """ global max_files_per_bug initial_projects = ["Chart", "Lang", "Closure", "Math", "Mockito", "Time"] if project not in initial_projects: raise Exception("Project {} invalid. Please select one of {}".format( project, initial_projects)) project_path = os.path.join(os.getcwd(), "data", project) if not os.path.isdir(project_path): print("FAILED") # should print to stop the main script raise Exception("Project {} directory not found".format(project_path)) # Solves the issue of different source paths for the same project if project == 'Lang' and int(bugnumber) < 36: source_path = get_source_path(project + '2') elif project == 'Math' and int(bugnumber) > 84: source_path = get_source_path(project + '2') else: source_path = get_source_path(project) # get only bugs choosen by user bugnumber = bugnumber.split(",") if not is_input_number_valid(bugnumber, project_path): print("FAILED") # should print to stop the main script raise Exception( "one or more json files({}) are not found in path {}".format( bugnumber, project_path)) bugnumbers = ['{}.json'.format(bug) for bug in bugnumber] if '0' in bugnumber: # 0 similar to "all" bugs bugnumbers = os.listdir(project_path) else: bugnumbers = [ doc for doc in os.listdir(project_path) if doc in bugnumbers ] with open(output, "w") as seed_file: # for each bug for bug in bugnumbers: data = json_to_dict(os.path.join(project_path, bug)) bug_number = bug.replace(".json", "") classes = [] # get rankings from morpho's report for item in data["rankings"]: java_file = os.path.join(source_path, item["class"]) if java_file not in classes: classes.append(java_file) if len(classes) == max_files_per_bug: break # get the top-k classes if len(classes) > 1: classes = ",".join( classes) # converts [classA, classB] to classA,classB else: classes = classes[0] # get only line expected_dir = 'oracle/' + project_name + '/' expected_msg_path = expected_dir + bug_number i = 0 f = 0 c = 0 with open(expected_msg_path) as f: failing = f.readlines() for l in failing: if '---' in l: testcase = l.strip().split(' ')[1] seed_file.write("{} {} {} {} {}\n".format( project, bug_number, testcase, classes, expected_msg_path))
import logging import os from utils import json_to_dict from mongoengine import connect def healthcheck(db_client): try: db_client.admin.command('ismaster') except Exception: logging.exception('Error while checking health') exit(1) exit(0) if __name__ == "__main__": logging.basicConfig() config = json_to_dict(os.getenv('CONFIG_PATH', 'config/karmaconf.json')) db_config = config['MONGO'] connection = connect(**db_config) healthcheck(connection)
def main(): parser = argparse.ArgumentParser(description='Sweep test config generator') parser.add_argument("--id", type=str, help="id of wandb sweep") parser.add_argument("--base_config", type=str, help="Base config file for test setup") parser.add_argument( "--fields", type=str, default="", help="Fields to carry over from training config to test config") parser.add_argument( "--grouping", type=str, default="dataset,cgan_type", help= "Parameters to group by, only the best (in validation) model is tested" ) parser.add_argument("--out_dir", type=str, help="Config file output directory") args = parser.parse_args() assert args.id, "Must specify id" assert args.out_dir, "Must specify output dir" assert args.base_config, "Must specify base test config" base_config = utils.json_to_dict(args.base_config) if args.fields: extra_fields = args.fields.split(",") else: extra_fields = [] groupings = args.grouping.split(",") runs = utils.get_sweep_runs(args.id) # Groups are mapped using a string: grouping1_grouping2_grouping3 etc. best_in_groups = {} # Dict of dataset-group pairs to (ll, run) for run in runs: ds = run.config["dataset"] group_values = [str(run.config[g]) for g in groupings] key = "_".join(group_values) # Exclude crashed runs if "log_likelihood" in run.summary: ll = run.summary["log_likelihood"] # Check for NaN or -inf if not (type(ll) == str): if (not (key in best_in_groups)) or (ll > best_in_groups[key][0]): best_in_groups[key] = (ll, run) for grouping, (ll, run) in best_in_groups.items(): # Create configs config = base_config # Always carry over dataset and model config["dataset"] = run.config["dataset"] config["model"] = run.config["model"] file_name = grouping.replace("/", "_") # Need to clean options with a / config["restore"] = run.id config["plot_prefix"] = "{}_".format(file_name) for field in extra_fields + groupings: config[field] = run.config[field] config_path = os.path.join(args.out_dir, "{}.json".format(file_name)) with open(config_path, 'w') as fp: json.dump(config, fp, indent=0) print("Created config for {}".format(grouping)) print("done")
def test_get_database(json_file): with open(json_file, 'r') as f: json_str = f.read() db = utils.json_to_dict(json_str) return db
def get_config(): parser = argparse.ArgumentParser(description='Train model') # If config file should be used parser.add_argument("--config", type=str, help="Config file to read run config from") # General parser.add_argument("--dataset", type=str, help="Which dataset to use") parser.add_argument("--model", type=str, help="Which type of model to use") parser.add_argument( "--test", type=int, default=0, help="If model should be tested (at the end of possible training)") parser.add_argument("--train", type=int, default=1, help="If model should be trained") parser.add_argument("--name", type=str, help="Name of the run for WandB") parser.add_argument("--seed", type=int, default=42, help="Seed for random number generator") parser.add_argument("--cpu", type=int, default=0, help="Force to run on CPU") # Evaluation parser.add_argument("--test_runs", type=int, default=10, help="Testing runs to average score for") parser.add_argument( "--restore", type=str, help="WandB run_id to restore parameters from (requires wandb logging)" ) parser.add_argument("--restore_file", type=str, help="Path to file to restore parameters from") parser.add_argument("--eval_div", type=str, help="Evaluate model by estimating a divergence") parser.add_argument( "--eval_cgan", type=str, help="CGAN (network architecture) to use for evaluation") # Plotting parser.add_argument( "--scatter", type=int, default=0, help="If scatter-plots should be created during validation/testing") parser.add_argument( "--cond_scatter", type=str, help="Create scatter plot for conditional distribution at given x:s") parser.add_argument( "--plot_pdf", type=str, help="List of x-values to plot pdf at during validation/testing") parser.add_argument( "--plot_pdf_index", type=str, help="List of test/validation set indexes to plot pdf for") parser.add_argument( "--plot_functions", type=int, default=0, help= "Plot some sampled functions by varying x and keeping noise constant") parser.add_argument("--plot_gt", type=int, default=0, help="Plot ground truth only, instead of model") parser.add_argument("--plot_prefix", type=str, help="Prefix to be prepended to plot file names") parser.add_argument( "--cond_plot_trajectories", type=str, help="""(For trajectories datasets) Plot 2D trajectory samples. If an index is given, plots trajectories for corresponding test sample. If a tuple is given, trajectories are conditioned on the tuple as x-value. """) parser.add_argument("--plot_trajectories", type=int, default=20, help="Amount of trajectories to plot.") # Batched training models (i.e. neural network based) parser.add_argument("--epochs", type=int, help="How many epochs to train for", default=10) parser.add_argument("--val_interval", type=int, default=10, help="Evaluate model every eval_interval:th epoch") parser.add_argument("--batch_size", type=int, help="Batch size for training", default=128) parser.add_argument( "--eval_batch_size", type=int, help="Batch size to use outside training, in validation etc.", default=1000) parser.add_argument("--lr", type=float, help="Learning rate", default=1e-3) parser.add_argument("--lr_decay", type=float, help="Multiplicative learning rate decay", default=1.0) parser.add_argument("--optimizer", type=str, help="Optimizer to use for training", default="rmsprop") # KDE parser.add_argument( "--kernel_scales", type=int, default=50, help="Amount of kernel scale parameters in KDE to try for validation") parser.add_argument( "--kernel_scale_min", type=float, default=0.001, help="Lower bound of allowed kernel scale range for KDE") parser.add_argument( "--kernel_scale_max", type=float, default=0.5, help="Upper bound of allowed kernel scale range for KDE") parser.add_argument( "--eval_samples", type=int, default=200, help="How many samples to draw for estimating KDE in evaluation") parser.add_argument("--kde_val", type=int, default=0, help="Get KDE estimate also in validation.") parser.add_argument( "--kde_batch_size", type=int, default=10, help="How many kernels scales to compute KDE for at the same time") # CGAN parser.add_argument( "--cgan_nets", type=str, help="""Name of CGAN network specification, available specs can be found in cgan_specs directory.""") parser.add_argument("--cgan_type", type=str, default="standard", help="""Version of CGAN training objective to use, see models/cgan_versions for a list""") parser.add_argument("--noise_dim", type=int, default=1, help="Dimensionality of noise vector fed to generator") parser.add_argument("--noise_dist", type=str, default="gaussian", help="Distribution to sample noise vector from") parser.add_argument("--gen_lr", type=float, help="Generator learning rate") parser.add_argument("--disc_lr", type=float, help="Discriminator learning rate") parser.add_argument( "--gen_lr_decay", type=float, help="Multiplicative learning rate decay for generator)") parser.add_argument( "--disc_lr_decay", type=float, help="Multiplicative learning rate decay for discriminator)") parser.add_argument("--gen_optimizer", type=str, help="Optimizer to use for generator training") parser.add_argument("--disc_optimizer", type=str, help="Optimizer to use for discriminator training") parser.add_argument( "--clip_grad", type=float, default=0., help="Value to clip gradients at (clipping by norm). 0 is no clipping." ) parser.add_argument( "--gen_samples", type=int, default=1, help= "How many generator samples to draw for each x in generator training") # GMMN (and CGMMN) parser.add_argument("--mmd_scales", type=str, default="1,5,10,20", help="""Scale parameter to use in MMD-based loss (if specific values for x and y are not set)""") parser.add_argument("--mmd_scales_x", type=str, help="MMD scale parameter for kernel applied on x") parser.add_argument("--mmd_scales_y", type=str, help="MMD scale parameter for kernel applied on y") parser.add_argument( "--kernel_lr", type=float, default=0.01, help="(only GMMN) Learning rate for kernel parameter tuning") parser.add_argument( "--mmd_lambda", type=float, default=1.0, help= "(only CGMMN) Regularizer lambda to stabilize matrix inversions in MMD" ) parser.add_argument("--sqrt_loss", type=int, default=1, help="""(only CGMMN) Use square root of the loss, can yield better results, see Li et al.""") # NN-based models (mdn, nn_reg, nn_het, dctd, cgmmn, gmmn) parser.add_argument( "--network", type=str, help="""Name of network specification to use, available specs can be found in nn_specs directory.""") parser.add_argument( "--l2_reg", type=float, default=0.0, help="L2-regularization added to cost function (aka weight decay)") # MDN parser.add_argument("--mixture_comp", type=int, default=5, help="Amount of mixture components in MDN") parser.add_argument( "--log_coefficients", type=int, default=0, help="If mixture coefficients should be logged to wandb") # GP parser.add_argument("--gp_kernel", type=str, default="rbf", help="Which kernel type to use in GP") parser.add_argument( "--opt_restarts", type=int, default=0, help="Restarts in kernel hyperparameter optimization process") # DCTD parser.add_argument( "--imp_samples", type=int, default=500, help="Amount of importance samples used to estimate normalization Z") parser.add_argument( "--proposal_scales", type=str, default="0.5,1,5", help="Scales of gaussians in mixture proposal distribution") parser.add_argument( "--mode_find_steps", type=int, default=100, help= ("Amount of optimization steps in mode finding for DCTD proposal distribution" )) parser.add_argument( "--mode_find_lr", type=float, default=1e-2, help="Learning rate in mode finding for DCTD proposal distribution") parser.add_argument( "--plot_dctd_modes", type=int, default=0, help="Create additional scatter plot with modes of DCTD model") args = parser.parse_args() config = vars(args) # Read additional config from file if args.config: assert os.path.exists(args.config), "No config file: {}".format( args.config) config_from_file = utils.json_to_dict(args.config) # Make sure all options in config file also exist in argparse config. # Avoids choosing wrong parameters because of typos etc. unknown_options = set(config_from_file.keys()).difference( set(config.keys())) unknown_error = "\n".join([ "Unknown option in config file: {}".format(opt) for opt in unknown_options ]) assert (not unknown_options), unknown_error config.update(config_from_file) assert config["dataset"], "No dataset specified" assert config["dataset"] in dataset_list.sets, ( "Unknown dataset: {}".format(config["dataset"])) assert config["model"], "No model specified" assert config["model"] in models, "Unknown model '{}'".format( config["model"]) for split_option in [ "plot_pdf", "plot_pdf_index", "cond_scatter", "mmd_scales", "mmd_scales_x", "mmd_scales_y", "proposal_scales", "cond_plot_trajectories", ]: opt_value = config[split_option] if opt_value: if "(" in opt_value: # entries are tuples (e.g. multi-dimensional x) # extra "," to always get a tuple of tuples parsed = ast.literal_eval(opt_value + ",") # Make into list of floats config[split_option] = [[float(e) for e in v] for v in parsed] else: # entries are single floats config[split_option] = [float(s) for s in opt_value.split(",")] return config
import gzip import utils import sys wids = set() with gzip.open(sys.argv[1]) as f: n = 0 for line in f: d = utils.json_to_dict(line) w = d["workout_id"] if (w in wids): print "DUPLICATE FOUND.. workout id = " + str(w) wids.add(w) n += 1 if (n % 100000 == 0): print "Done with %d workouts.." % (n)
def generate_seed(project, bugnumber): """ generates a file that contains json info to run d4j and lithium """ initial_projects = ["Chart", "Lang", "Closure", "Math", "Mockito", "Time"] if project not in initial_projects: raise Exception("Project {} invalid. Please select one of {}".format( project, initial_projects)) project_path = os.path.join(os.getcwd(), "data", project) if not os.path.isdir(project_path): print("FAILED") # should print to stop the main script raise Exception("Project {} directory not found".format(project_path)) # Solves the issue of different source paths for the same project if project == 'Lang' and int(bugnumber) < 36: source_path = get_source_path(project + '2') elif project == 'Math' and int(bugnumber) > 84: source_path = get_source_path(project + '2') else: source_path = get_source_path(project) # get only bugs choosen by user bugnumber = bugnumber.split(",") if not is_input_number_valid(bugnumber, project_path): print("FAILED") # should print to stop the main script raise Exception( "one or more json files({}) are not found in path {}".format( bugnumber, project_path)) bugnumbers = ['{}.json'.format(bug) for bug in bugnumber] if '0' in bugnumber: # 0 similar to "all" bugs bugnumbers = os.listdir(project_path) else: bugnumbers = [ doc for doc in os.listdir(project_path) if doc in bugnumbers ] # for each bug for bug in bugnumbers: data = json_to_dict(os.path.join(project_path, bug)) bug_number = bug.replace(".json", "") # getting the expected message expected_dir = 'oracle/' + project_name + '/' if not os.path.exists(expected_dir): os.makedirs(expected_dir) expected_msg_path = expected_dir + bug_number project_dir = tempfile.mkdtemp(prefix="lithium-slicer_") output_filepath = project_dir + '/failing_tests' print('output_filepath=', output_filepath) expected_msg = [] failing = '' runtest_script = "bash run_input_test.sh {PROJECTDIR} {PROJECT} {BUG}" cmd_str = runtest_script.format(PROJECTDIR=project_dir, PROJECT=project_name, BUG=bug_number + 'b') output = call_cmd(cmd_str) # call shell script if os.path.isfile(output_filepath): with open(output_filepath) as out_fail: failing = out_fail.readlines() with open(expected_msg_path, "w+") as expected: expected.write("{}".format(''.join(failing)))