Beispiel #1
0
def main():
    
    data = {}
    G = gennum()
    with open("./southpark/All-seasons.csv","r") as file:
        r = csv.DictReader(file,delimiter=",")
        for row in r:
            data[G.next()] = (row["Character"], list(filter(lambda x: x != "", re.split(r"\W+", row["Line"].lower())) )) 
    
    def mapfn(k, v):
        for w in v[1]:
            yield v[0], w

    def reducefn(k, vs):
        res = set()
        for word in vs:
            res.add(word)
        res = len(res)
        return res

    s = mincemeat.Server()
    s.datasource = data
    s.mapfn = mapfn
    s.reducefn = reducefn
    print("Server is running...")
    res = s.run_server(password="******")
    
    with open("./task1res.csv","wb") as file:
        w = csv.writer(file, delimiter=",")
        w.writerow(["Character","Number of words"])
        for key in res:
            w.writerow([key, res[key]])
Beispiel #2
0
def run():
    data = {}
    k = 3
    centers = [(round(random.uniform(0, 1), 2), round(random.uniform(0, 1), 2)) for _ in range(k)]
    centers.sort()
    with open("kmeans_input.txt", "r") as f:
        lines = f.readlines()
        for line in lines:
            data[tuple([float(a) for a in line.split(" ")])] = centers
    while True:
        points = data.keys()
        server = mincemeat.Server()
        server.mapfn = map_kmeans
        server.reducefn = reduce_kmeans
        server.datasource = data
        res = server.run_server(password="******")
        new_centers = [a[0] for a in res.values()]
        new_centers.sort()
        if new_centers == centers:
            for cluster in res.values():
                for p in cluster[1]:
                    print("(%s %s)\t(%s %s)" % (p[0], p[1], cluster[0][0], cluster[0][1]))
            break
        else:
            centers = new_centers
        for p in points:
            data[p] = centers
Beispiel #3
0
def server(credentials):
    """
    Run a Map-Reduce Server, and process a single Map-Reduce 
    """
    s = mincemeat.Server()
    s.datasource = datasource
    s.mapfn = mapfn
    s.collectfn = collectfn
    s.reducefn = reducefn
    s.finishfn = finishfn

    results = s.run_server(**credentials)

    # Map-Reduce over 'datasource' complete.  Enumerate results,
    # ordered both lexicographically and by count
    bycount = {}
    for k, v in results.items():
        if v in bycount:
            bycount[v].append(k)
        else:
            bycount[v] = [k]

    bycountlist = []
    for k, l in sorted(bycount.items()):
        for w in sorted(l):
            bycountlist.append((k, w))

    for k, lt in zip(sorted(results.keys()), bycountlist):
        print "%8d %-40.40s %8d %s" % (results[k], k, lt[0], lt[1])
Beispiel #4
0
def test_bind():
    """
    Tests that socket binding exclusion works.
    """
    global testcount
    testcount += 1

    port = unique_port(mincemeat.DEFAULT_PORT)
    cred = credentials.copy()
    cred.update({"port": port})

    s1 = mincemeat.Server_daemon(credentials=cred, timeout=5.)
    state = s1.state()
    assert state == "idle"
    s1.start()
    time.sleep(1)
    state = s1.state()
    assert state == "authenticated"

    try:
        s2 = mincemeat.Server()
        s2.conn(**cred)
        assert False == "Should have thrown Exception in bind()!"
    except Exception, e:
        assert "Only one usage of each socket address" in str(e) \
            or "Address already in use" in str(e)
Beispiel #5
0
def test_example():
    """
    Tests a scaled-up version of example.py.
    
    Starts 1-5 Client threads, and scales up the text corpus a bit, proportional
    to the number of threads we choose.
    """
    global testcount
    testcount += 1

    port = unique_port(mincemeat.DEFAULT_PORT)

    clients = random.randint(1, 5)
    scale = clients * 73

    # Since we are running multiple asyncore-based Clients and a
    # Server in separate threads, we need to specify map={} for the
    # Clients, so they all don't use the (default) global asyncore
    # socket map as the Server...
    logging.info("Starting %d clients...", clients)
    for _ in xrange(clients):
        c = mincemeat.Client(map={})
        t = threading.Timer(1.0,
                            c.conn,
                            args=("", port),
                            kwargs={"password": "******"})
        t.daemon = True
        t.start()

    s = mincemeat.Server(map={})
    s.datasource = dict(enumerate(data * scale))
    s.mapfn = mapfn
    s.reducefn = reducefn

    now = mincemeat.timer()
    results = s.run_server(password="******", port=port)
    expected = dict(
        (k, v * scale) for k, v in {
            'All': 1,
            "Couldn't": 1,
            'Dumpty': 2,
            'Humpty': 3,
            "King's": 2,
            'a': 2,
            'again': 1,
            'all': 1,
            'and': 1,
            'fall': 1,
            'great': 1,
            'had': 1,
            'horses': 1,
            'men': 1,
            'on': 1,
            'put': 1,
            'sat': 1,
            'the': 2,
            'together': 1,
            'wall': 1
        }.iteritems())
    assert results == expected
Beispiel #6
0
    def start(self, display):
        # start the server
        s = mincemeat.Server()
        s.datasource = self.source
        s.mapfn = self.mapfn
        s.reducefn = self.reducefn

        results = s.run_server(password="******")
        display(results)
def run_server(csv_file):
    rdr = dict(enumerate(csv.DictReader(csv_file)))

    s = mm.Server()
    s.datasource = rdr
    s.mapfn = mapfn
    s.reducefn = reducefn

    return s.run_server()
Beispiel #8
0
def run_server():
    print "Starting up"
    s = mincemeat.Server()
    print "Prep data"
    s.datasource = dict(enumerate(data))
    s.mapfn = mapfn
    s.reducefn = reducefn
    print "starting server"
    results = s.run_server(password="******")
Beispiel #9
0
def main():
    server = mincemeat.Server()
    data = get_data(mapsize=10000000, nummaps=1000)
    log.info('data: %s', data)
    log.info('waiting for workers...')
    server.datasource = data
    server.mapfn = mapfn
    server.reducefn = reducefn
    results = server.run_server(password='******')
    inside, total = results['totals']
    print(results, inside, total)
    print('{0}: {1} inside, {2} total, pi ~= {3}'.format('totals', inside, total, 4. * inside / total))
Beispiel #10
0
def run_server(docs, docnames):
    rdr = {
        docname: readall(fname).translate(None, string.punctuation)
        for fname, docname in zip(docs, docnames)
    }

    s = mm.Server()
    s.datasource = rdr
    s.mapfn = mapfn
    s.reducefn = eval(reducefn_template % docnames)

    return s.run_server()
Beispiel #11
0
def run_server(mat_file):
    matrices = list(csv.DictReader(mat_file))
    matsize = max(int(x['row']) for x in matrices) + 1

    s = mm.Server()
    s.datasource = {(x['matrix'] == 'a', int(x['row']), int(x['col'])):
                    (matsize, int(x['value']))
                    for x in matrices}
    s.mapfn = mapfn
    s.reducefn = reducefn

    return s.run_server()
Beispiel #12
0
def main():

    data = {}
    G = gennum()

    m, k, n = sys.argv[1:4]

    with open("m1.csv", "r") as file:
        r = csv.DictReader(file, delimiter=",")
        for row in r:
            new_key = G.next()
            data[new_key] = row
            data[new_key]["m"] = int(m)
            data[new_key]["k"] = int(k)
            data[new_key]["n"] = int(n)

    def mapfn(k, v):
        if v["matrix"] == "a":
            for i in range(v["n"]):
                yield (int(v["row"]), i), (int(v["col"]), int(v["val"]))
        else:
            for i in range(v["m"]):
                yield (i, int(v["col"])), (int(v["row"]), int(v["val"]))

    def reducefn(k, vs):
        d = {}
        for v in vs:
            if v[0] in d:
                d[v[0]].append(v[1])
            else:
                d[v[0]] = [v[1]]
        res = 0
        for key in d:
            res += d[key][0] * d[key][1]
        return res % 97

    s = mincemeat.Server()
    s.datasource = data
    s.mapfn = mapfn
    s.reducefn = reducefn
    print("Server is running...")
    res = s.run_server(password="******")

    with open("task3res.csv", "wb") as file:
        w = csv.writer(file, delimiter=",")
        w.writerow(["matrix", "row", "col", "val"])
        for key in res:
            w.writerow(["c", key[0], key[1], res[key]])
Beispiel #13
0
def main():
    s = mincemeat.Server()

    data = {f: d for f, d in read_all_files(False)}
    # print(data.keys())

    # The data source can be any dictionary-like object
    s.datasource = data
    s.mapfn = mapfn
    s.reducefn = reducefn

    results = s.run_server(password="******")
    results = sorted(results.items(), key=operator.itemgetter(1), reverse=True)
    print(results)
    with open('sorted.txt', 'w') as f:
        f.write('\n'.join('%s\t%d' % result for result in results))
Beispiel #14
0
def run():
    data = {}
    with open("check_clique_input.txt", "r") as f:
        for line in f.readlines():
            temp = line.split(" -> ")
            data[temp[0].strip()] = temp[1].strip().split(" ")

    server = mincemeat.Server()
    server.mapfn = clique_map
    server.reducefn = clique_reduce
    server.datasource = data
    res = server.run_server(password="******")

    edge_lists_len = res.values()
    vertex_num = len(set(res.keys()))
    print("YES" if len(set(edge_lists_len)) == 1
          and edge_lists_len[0] == vertex_num - 1 else "NO")
Beispiel #15
0
def main():
    with open(DATA_FILE, "r") as f:
        data = map(str.strip, f.readlines())
    s = mincemeat.Server()
    s.mapfn = map_func_phase_one
    s.reducefn = reduce_func_phase_one
    s.datasource = dict(enumerate(data))
    results = s.run_server(password=SERVER_PASSWORD, port=SERVER_PORT)
    results = filter(None, results.values())

    s.mapfn = map_func_phase_two
    s.reducefn = reduce_func_phase_two
    s.datasource = dict(enumerate(results))
    results = s.run_server(password=SERVER_PASSWORD, port=SERVER_PORT)
    for key, num_of_ref in results.items():
        if num_of_ref:
            print("%s (%s)" % (key, num_of_ref))
Beispiel #16
0
    def mapreduce_pageranks(self, clients=8):
        # Since we are running multiple asyncore-based Clients and a
        # Server in separate threads, we need to specify map={} for the
        # Clients, so they all don't use the (default) global asyncore
        # socket map as the Server...
        logging.info("Starting %d clients...", clients)
        for _ in xrange(clients):
            c = mincemeat.Client()
            c.password = "******"
            p = Process(target=c.conn, args=("", mincemeat.DEFAULT_PORT))
            p.start()

        s = mincemeat.Server()
        s.datasource = self.datasource()
        s.mapfn = self.mapfn
        s.reducefn = self.reducefn
        result = s.run_server(password="******")
        return result
Beispiel #17
0
def main():

    data = {}

    directory = "./sherlock"
    allbooks = os.listdir(directory)
    numbooks = len(allbooks)

    for book in allbooks:
        name = os.path.join(directory, book)
        with open(name, "r") as file:
            data[book] = list(
                filter(lambda x: x != "", re.split(r"\W+",
                                                   file.read().lower())))

    def mapfn(k, v):
        for w in v:
            yield w, (k, 1)

    def reducefn(k, vs):
        res = {}
        for a, i in vs:
            if a in res:
                res[a] += i
            else:
                res[a] = i
        return res

    s = mincemeat.Server()
    s.datasource = data
    s.mapfn = mapfn
    s.reducefn = reducefn
    print("Server is running...")
    res = s.run_server(password="******")

    with open("./task2res.csv", "wb") as file:
        w = csv.DictWriter(file,
                           fieldnames=["Word"] + allbooks,
                           delimiter=",",
                           restval=0)
        w.writeheader()
        for key in res:
            res[key]["Word"] = key
            w.writerow(res[key])
def server(credentials, asynchronous=False, map=None):
    """
    Run a Map-Reduce Server, and process a single Map-Reduce task.

    Raises exception on failure to create and run a Server, or
    complete the task successfully.  If asynchronous, does not
    initiate processing; use s.process().  After processing, call
    s.results().
    """
    s = mincemeat.Server(map=map)

    s.datasource = datasource
    s.mapfn = mapfn
    s.collectfn = collectfn
    s.reducefn = reducefn
    s.finishfn = finishfn

    s.conn(asynchronous=asynchronous, **credentials)
    return s
Beispiel #19
0
def main():
    with open(CENTROIDS_FILE, "wb") as f:
        centroids = generate_centroids(K)
    with open(DATA_FILE, "rb") as f:
        data = [map(float, x.strip().split(',')) for x in f.readlines()]
    s = mincemeat.Server()
    s.mapfn = map_func
    s.reducefn = reduce_func
    old_results = list()
    results = list()
    while not old_results or old_results != results:
        plt.scatter(*np.array(data).T)
        plt.scatter(*np.array(centroids).T, color="g", s=500)
        plt.show()
        old_results = results
        s.datasource = dict(enumerate(zip((centroids, ) * len(data), data)))
        results = s.run_server(password=SERVER_PASSWORD, port=SERVER_PORT)
        centroids = results.values()
        print(results)
Beispiel #20
0
def run():
    data = {}
    with open("pseudo_synonyms_input.txt", "r") as f:
        for idx, line in enumerate(f.readlines()):
            data[idx] = line.strip()

    server = mincemeat.Server()
    server.mapfn = ps1_map
    server.reducefn = ps1_reduce
    server.datasource = data
    res1 = server.run_server(password="******")

    server.mapfn = ps2_map
    server.reducefn = ps2_reduce
    server.datasource = res1
    res2 = server.run_server(password="******")

    for key, val in res2.iteritems():
        if val > 1:
            print("%s - %s (%s)" % (key[0], key[1], val))
Beispiel #21
0
    for index, item in enumerate(v):
        columns = item.split(':')
        if columns[0] == 'Vendas':
            total += int(columns[1])
        if columns[0] == 'Filial':
            NomeFilial = columns[1]
    L = list()
    L.append(NomeFilial + ' , ' + str(total))
    return L


# Transforma todos os arquivos em uma estrutura de "chave/valor" (file_name/file_content).
source = dict(
    (file_name, file_contents(file_name)) for file_name in text_files)

s = mincemeat.Server()

s.datasource = source
s.mapfn = mapfn
s.reducefn = reducefn

results = s.run_server(password="******")

# Apresenta o resultado em um arquivo CSV.
w = csv.writer(open(PATH + 'result.csv', 'w'))

for k, v in results.items():
    w.writerow([
        k,
        str(v).replace('[', '').replace(']', '').replace("'",
                                                         '').replace(' ', '')