def item_mean(ctx, **params): from models import item_mean model = Model(**ctx.obj) model.train(load_fn=lambda path: map( lambda line: helpers.typed_line(line, parser=helpers.parse_normalized), helpers.read_lines(path, header=False)), train_fn=item_mean.train, predict_fn=item_mean.predict, store_fn=helpers.write_normalized, **params)
def main(): data = helpers.load_json("data/states.json") if not isinstance(data, dict): data = { x["full_name"]:x for x in data } key = "marriage_age" new = {} lines = helpers.read_lines("entry.txt") lines = [ x for x in lines if x ] # lines = lines[::4] for line in lines: line = line.split(". ")[-1] name, num = line.split(": ", 1) new[name] = float(num) try: name = line.split("\t")[0] name = name.split("(")[0].strip() new[name] = float(line.split("\t")[1].replace(",", "")) except Exception: pass [ print(k, ":", v) for k, v in new.items() ] for name, val in new.items(): if name not in data: data[name] = {} data[name][key] = val # Clean up the data cleaned = {} for k, v in data.items(): key = rmchars(k, ".") key = key.replace("Saint", "St") if key in cleaned: cleaned[key].update(v) else: cleaned[key] = v cleaned[key]["name"] = key return helpers.dump_json(cleaned, "foo.json")
def train(self, train_class, **options): # load training dataset and train model training = SurpriseModel._load_data( self.training_path).build_full_trainset() algorithm = train_class(**options) algorithm.fit(training) def predict(user, item): prediction = algorithm.predict(user, item) return user, item, helpers.clip(prediction.est) # load prediction dataset and make predictions to_predict = map(lambda r: r.strip().split(',')[:2], helpers.read_lines(self.predict_path, header=False)) predictions = map(lambda pair: predict(*pair), to_predict) # make predictions # write to output file helpers.write_normalized(self.output_path, predictions)
def get_submission_id_pairs(submission_prediction_files): get_user_item_pair = lambda t: t.strip().split(',')[:2] f = submission_prediction_files[0] return map(get_user_item_pair, helpers.read_lines(f, header=False))
def load_ratings(path): get_rating = lambda t: float(t.strip().split(',')[2]) return np.fromiter(map(get_rating, helpers.read_lines(path, header=False)), dtype=np.float)
model.add(Lambda(normalize, name='normalize')) model.add(Conv2D(24, 5, strides=2, activation='elu')) model.add(Conv2D(36, 5, strides=2, activation='elu')) model.add(Conv2D(48, 5, strides=2, activation='elu')) model.add(Conv2D(64, 3, strides=1, activation='elu')) model.add(Conv2D(64, 3, strides=1, activation='elu')) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(100, activation='elu')) model.add(Dense(50, activation='elu')) model.add(Dense(10, activation='elu')) model.add(Dense(1)) model.compile(loss='mse', optimizer='adam') lines = helpers.read_lines(sys.argv[1:][0]) lines = balance(lines) lines = shuffle(lines) train_samples, valid_samples = train_test_split(lines, test_size=0.2) print('Training set: {}'.format(len(train_samples))) print('Validation set: {}'.format(len(valid_samples))) # # Visualization # plt.hist([t[1] for t in train_samples], bins=100) # plt.show() EPOCHS = 10 BATCH_SIZE = 32 train_generator = helpers.generator(train_samples, BATCH_SIZE)
filter = None if sys.argv[1] == "simple": filter = non_constant_filter elif sys.argv[1] == "clear": filter = clear_variation_filter elif sys.argv[1] == "strong": filter = strong_variation_filter else: print("Unknown filter type: " + sys.argv[1]) exit(1) # Filter loop for name in sys.argv[2:]: lines = read_lines(name) # parse lines instr = [parse(l) for l in lines] # filter out instructions without given latencies or with constant # latencies instr = [i for i in instr if filter(i)] outfile = open( "{}/../filtered/{}-{}".format( Path(__file__).parent.absolute(), sys.argv[1], os.path.basename(name)), "w") outfile.write('\n'.join(i[3] for i in instr)) outfile.write('\n') outfile.close()