def test_open(self): with open('LICENSE.txt') as f: data = f.readlines() self.assertListEqual(data, seq.open('LICENSE.txt').to_list()) text = ''.join(data).split(',') self.assertListEqual(text, seq.open('LICENSE.txt', delimiter=',').to_list()) with self.assertRaises(ValueError): seq.open('LICENSE.txt', mode='w').to_list()
def plot(filename): data = seq.open(filename).map(parse_line) bfs = data.filter(_.algorithm == 'bfs') dfs = data.filter(_.algorithm == 'dfs') x = np.array(bfs.map(lambda x: x.vertexes * x.edges * x.edges).list()) y = np.array(bfs.map(_.runtime).list()) slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) print(slope, intercept, r_value, p_value, std_err) plt.title('Numerical Performance of Edmonds-Karp') plt.xlabel('Input Size in VE^2') plt.ylabel('Running Time in Seconds') plt.scatter(x, y) plt.show() plt.clf() ff_data = dfs.map(lambda x: (x.flow, x.flow * x.edges, x.runtime)).group_by(_[0]).cache() plt.title('Numerical Performance of Ford-Fulkerson') plt.xlabel('Input Size in Ef') plt.ylabel('Running Time in Seconds') max_flow = ff_data.max_by(lambda kv: kv[0])[0] all_x = list() all_y = list() for k, v in ff_data: x = list(map(_[1], v)) all_x.extend(x) y = list(map(_[2], v)) all_y.extend(y) ratio = 1 - k / max_flow if ratio > .8: ratio = .8 plt.scatter(x, y, color=str(ratio)) x = np.array(all_x) y = np.array(all_y) slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) print(slope, intercept, r_value, p_value, std_err) plt.show()
def test_seq_open(self): path = _make_tmp_file(''' red green blue ''') res = seq.open(path) assert res == ['red\n', 'green\n', 'blue\n', '\n']
def load_meta(meta_file): def parse_line(line): tokens = line.split() question = int(tokens[0]) sentence = int(tokens[1]) token = int(tokens[2]) guess = ' '.join(tokens[3:]) return Meta(question, sentence, token, guess) return seq.open(meta_file).map(parse_line)
def load_predictions(pred_file): def parse_line(line): try: tokens = line.split() score = float(tokens[0]) if len(tokens) < 2: question, sentence, token = None, None, None else: question, sentence, token = [int(x) for x in tokens[1].split('_')] return Prediction(score, question, sentence, token) except Exception: print("Error parsing line: {0}".format(line)) raise return seq.open(pred_file).map(parse_line)
def load_predictions(pred_file: str) -> Sequence: def parse_line(line: str) -> Prediction: try: tokens = line.split() score = float(tokens[0]) if len(tokens) < 2: question, sentence, token = None, None, None else: question, sentence, token = [ int(x) for x in tokens[1].split('_') ] return Prediction(score, question, sentence, token) except Exception: log.info("Error parsing line: {0}".format(line)) raise return seq.open(pred_file).map(parse_line)
def plot(filename): data = seq.open(filename).map(parse_line) bfs = data.filter(_.algorithm == 'bfs') dfs = data.filter(_.algorithm == 'dfs') x = np.array(bfs.map(lambda x: x.vertexes * x.edges * x.edges).list()) y = np.array(bfs.map(_.runtime).list()) slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) print(slope, intercept, r_value, p_value, std_err) plt.title('Numerical Performance of Edmonds-Karp') plt.xlabel('Input Size in VE^2') plt.ylabel('Running Time in Seconds') plt.scatter(x, y) plt.show() plt.clf() ff_data = dfs.map(lambda x: (x.flow, x.flow * x.edges, x.runtime)).group_by( _[0]).cache() plt.title('Numerical Performance of Ford-Fulkerson') plt.xlabel('Input Size in Ef') plt.ylabel('Running Time in Seconds') max_flow = ff_data.max_by(lambda kv: kv[0])[0] all_x = list() all_y = list() for k, v in ff_data: x = list(map(_[1], v)) all_x.extend(x) y = list(map(_[2], v)) all_y.extend(y) ratio = 1 - k / max_flow if ratio > .8: ratio = .8 plt.scatter(x, y, color=str(ratio)) x = np.array(all_x) y = np.array(all_y) slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) print(slope, intercept, r_value, p_value, std_err) plt.show()
def logfile2blocks(self, path): # type: (Path) -> Iterable[str] return seq.open(path.as_posix(), encoding='utf8')
def _read_file(path): content = seq.open(path, delimiter='\n') \ .map(lambda x: x.strip()) \ .filter(lambda x: x != '') \ .make_string('\n') return content
def test_open_gzip(self): with open("functional/test/data/test.csv", "rb") as f: data = f.readlines() self.assertListEqual(data, seq.open('functional/test/data/test.csv.gz').to_list())