def test_open(self):
        with open('LICENSE.txt') as f:
            data = f.readlines()
        self.assertListEqual(data, seq.open('LICENSE.txt').to_list())

        text = ''.join(data).split(',')
        self.assertListEqual(text, seq.open('LICENSE.txt', delimiter=',').to_list())

        with self.assertRaises(ValueError):
            seq.open('LICENSE.txt', mode='w').to_list()
Exemple #2
0
def plot(filename):
    data = seq.open(filename).map(parse_line)
    bfs = data.filter(_.algorithm == 'bfs')
    dfs = data.filter(_.algorithm == 'dfs')
    x = np.array(bfs.map(lambda x: x.vertexes * x.edges * x.edges).list())
    y = np.array(bfs.map(_.runtime).list())
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    print(slope, intercept, r_value, p_value, std_err)
    plt.title('Numerical Performance of Edmonds-Karp')
    plt.xlabel('Input Size in VE^2')
    plt.ylabel('Running Time in Seconds')
    plt.scatter(x, y)
    plt.show()
    plt.clf()
    ff_data = dfs.map(lambda x: (x.flow, x.flow * x.edges, x.runtime)).group_by(_[0]).cache()
    plt.title('Numerical Performance of Ford-Fulkerson')
    plt.xlabel('Input Size in Ef')
    plt.ylabel('Running Time in Seconds')
    max_flow = ff_data.max_by(lambda kv: kv[0])[0]
    all_x = list()
    all_y = list()
    for k, v in ff_data:
        x = list(map(_[1], v))
        all_x.extend(x)
        y = list(map(_[2], v))
        all_y.extend(y)
        ratio = 1 - k / max_flow
        if ratio > .8:
            ratio = .8
        plt.scatter(x, y, color=str(ratio))
    x = np.array(all_x)
    y = np.array(all_y)
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    print(slope, intercept, r_value, p_value, std_err)
    plt.show()
Exemple #3
0
 def test_seq_open(self):
     path = _make_tmp_file('''
         red
         green
         blue
     ''')
     res = seq.open(path)
     assert res == ['red\n', 'green\n', 'blue\n', '\n']
Exemple #4
0
def load_meta(meta_file):
    def parse_line(line):
        tokens = line.split()
        question = int(tokens[0])
        sentence = int(tokens[1])
        token = int(tokens[2])
        guess = ' '.join(tokens[3:])
        return Meta(question, sentence, token, guess)
    return seq.open(meta_file).map(parse_line)
Exemple #5
0
def load_predictions(pred_file):
    def parse_line(line):
        try:
            tokens = line.split()
            score = float(tokens[0])
            if len(tokens) < 2:
                question, sentence, token = None, None, None
            else:
                question, sentence, token = [int(x) for x in tokens[1].split('_')]
            return Prediction(score, question, sentence, token)
        except Exception:
            print("Error parsing line: {0}".format(line))
            raise
    return seq.open(pred_file).map(parse_line)
Exemple #6
0
def load_predictions(pred_file: str) -> Sequence:
    def parse_line(line: str) -> Prediction:
        try:
            tokens = line.split()
            score = float(tokens[0])
            if len(tokens) < 2:
                question, sentence, token = None, None, None
            else:
                question, sentence, token = [
                    int(x) for x in tokens[1].split('_')
                ]
            return Prediction(score, question, sentence, token)
        except Exception:
            log.info("Error parsing line: {0}".format(line))
            raise

    return seq.open(pred_file).map(parse_line)
Exemple #7
0
def plot(filename):
    data = seq.open(filename).map(parse_line)
    bfs = data.filter(_.algorithm == 'bfs')
    dfs = data.filter(_.algorithm == 'dfs')
    x = np.array(bfs.map(lambda x: x.vertexes * x.edges * x.edges).list())
    y = np.array(bfs.map(_.runtime).list())
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    print(slope, intercept, r_value, p_value, std_err)
    plt.title('Numerical Performance of Edmonds-Karp')
    plt.xlabel('Input Size in VE^2')
    plt.ylabel('Running Time in Seconds')
    plt.scatter(x, y)
    plt.show()
    plt.clf()
    ff_data = dfs.map(lambda x:
                      (x.flow, x.flow * x.edges, x.runtime)).group_by(
                          _[0]).cache()
    plt.title('Numerical Performance of Ford-Fulkerson')
    plt.xlabel('Input Size in Ef')
    plt.ylabel('Running Time in Seconds')
    max_flow = ff_data.max_by(lambda kv: kv[0])[0]
    all_x = list()
    all_y = list()
    for k, v in ff_data:
        x = list(map(_[1], v))
        all_x.extend(x)
        y = list(map(_[2], v))
        all_y.extend(y)
        ratio = 1 - k / max_flow
        if ratio > .8:
            ratio = .8
        plt.scatter(x, y, color=str(ratio))
    x = np.array(all_x)
    y = np.array(all_y)
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    print(slope, intercept, r_value, p_value, std_err)
    plt.show()
Exemple #8
0
 def logfile2blocks(self, path):
     # type: (Path) -> Iterable[str]
     return seq.open(path.as_posix(), encoding='utf8')
def _read_file(path):
    content = seq.open(path, delimiter='\n') \
        .map(lambda x: x.strip()) \
        .filter(lambda x: x != '') \
        .make_string('\n')
    return content
 def test_open_gzip(self):
     with open("functional/test/data/test.csv", "rb") as f:
         data = f.readlines()
     self.assertListEqual(data, seq.open('functional/test/data/test.csv.gz').to_list())