def test_encode(self):
     linRegFactory = LinearRegressionFactory(11)
     linReg = linRegFactory.get_instance()
     encoded = linRegFactory.encode(linReg)
 
     protocol = JSONProtocol()
     print protocol.write(0, encoded)
    def test_encode(self):
        linRegFactory = LinearRegressionFactory(11)
        linReg = linRegFactory.get_instance()
        encoded = linRegFactory.encode(linReg)

        protocol = JSONProtocol()
        print protocol.write(0, encoded)
Ejemplo n.º 3
0
 def test_encode(self):
     '''
     Test whether algorithm can be json encoded (used as mrjob internal protocol)
     '''
     layerSizes = [3,2,1]
     nnFactory = PredictionNNFactory(layerSizes)
     nn = nnFactory.get_instance()
     # encode
     obj_encoded = nnFactory.encode(nn)
     # call json protocol
     protocol = JSONProtocol()    
     protocol.write("test_decode", obj_encoded)
def encode_node(node_id, links=None, score=1):
    node = {}
    if links:
        node['links'] = sorted(links.items())
    node['score'] = score
    x = JSONProtocol()
    return x.write(node_id, node) + '\n'
Ejemplo n.º 5
0
    def test_uses_json_format(self):
        KEY = ['a', 1]
        VALUE = {'foo': {'bar': 3}, 'baz': None}
        ENCODED = '["a", 1]\t{"foo": {"bar": 3}, "baz": null}'

        self.assertEqual((KEY, VALUE), JSONProtocol.read(ENCODED))
        self.assertEqual(ENCODED, JSONProtocol.write(KEY, VALUE))
Ejemplo n.º 6
0
    def test_uses_json_format(self):
        KEY = ['a', 1]
        VALUE = {'foo': {'bar': 3}, 'baz': None}
        ENCODED = '["a", 1]\t{"foo": {"bar": 3}, "baz": null}'

        self.assertEqual((KEY, VALUE), JSONProtocol.read(ENCODED))
        self.assertEqual(ENCODED, JSONProtocol.write(KEY, VALUE))
Ejemplo n.º 7
0
 def test_numerical_keys_become_strs(self):
     # JSON should convert numbers to strings when they are dict keys
     self.assertEqual(({
         '1': 2
     }, {
         '3': 4
     }), JSONProtocol.read(JSONProtocol.write({1: 2}, {3: 4})))
def encode_node(node_id, links=None, score=1):
	node = {}
	if links:
		node['links'] = sorted(links.items())
	node['score'] = score
	x=JSONProtocol()
	return x.write(node_id, node) + '\n'
 def test_decode(self):
     linRegFactory = LinearRegressionFactory(11)
     linReg = linRegFactory.get_instance()
     obj_encoded = linRegFactory.encode(linReg)
 
     protocol = JSONProtocol()
     json_encoded = protocol.write(0, obj_encoded)
     obj_encoded = protocol.read(json_encoded)
     
     linRegArr = linRegFactory.decode([obj_encoded[1]])
     assert type(linRegArr) == list, "decoded not as a list"
     assert type(linRegArr[0]) == LinearRegression, "decoded not as LinearRegression"
    def test_decode(self):
        linRegFactory = LinearRegressionFactory(11)
        linReg = linRegFactory.get_instance()
        obj_encoded = linRegFactory.encode(linReg)

        protocol = JSONProtocol()
        json_encoded = protocol.write(0, obj_encoded)
        obj_encoded = protocol.read(json_encoded)

        linRegArr = linRegFactory.decode([obj_encoded[1]])
        assert type(linRegArr) == list, "decoded not as a list"
        assert type(linRegArr[0]
                    ) == LinearRegression, "decoded not as LinearRegression"
Ejemplo n.º 11
0
def encode_node(node_id, links=None, score=1):
    """Print out a node, in JSON format.

    :param node_id: unique ID for this node (any type is okay)
    :param links: a list of tuples of ``(node_id, weight)``; *node_id* is the ID of a node to send score to, and *weight* is a number between 0 and 1. Your weights should sum to 1 for each node, but if they sum to less than 1, the algorithm will still converge.
    :type score: float
    :param score: initial score for the node. Defaults to 1. Ideally, the average weight of your nodes should be 1 (but it if isn't, the algorithm will still converge).
    """
    node = {}
    if links is not None:
        node['links'] = sorted(links.items())

    node['score'] = score

    return JSONProtocol.write(node_id, node) + '\n'
Ejemplo n.º 12
0
def encode_node(node_id, links=None, score=1):
    """Print out a node, in JSON format.

    :param node_id: unique ID for this node (any type is okay)
    :param links: a list of tuples of ``(node_id, weight)``; *node_id* is the ID of a node to send score to, and *weight* is a number between 0 and 1. Your weights should sum to 1 for each node, but if they sum to less than 1, the algorithm will still converge.
    :type score: float
    :param score: initial score for the node. Defaults to 1. Ideally, the average weight of your nodes should be 1 (but it if isn't, the algorithm will still converge).
    """
    node = {}
    if links is not None:
        node['links'] = sorted(links.items())

    node['score'] = score

    return JSONProtocol.write(node_id, node) + '\n'
Ejemplo n.º 13
0
 def test_decode(self):
     '''
     Test whether algorithm can be json encoded (used as mrjob internal protocol)
     '''
     layerSizes = [3,2,1]
     nnFactory = PredictionNNFactory(layerSizes)
     nn = nnFactory.get_instance()
     # encode
     obj_encoded = nnFactory.encode(nn)
     # call json protocol
     protocol = JSONProtocol()    
     json_encoded = protocol.write("test_decode", obj_encoded)
     obj_encoded = protocol.read(json_encoded)
     
     nnArr = nnFactory.decode([obj_encoded[1]])
     assert type(nnArr) == list, "decoded not as a list"
     assert type(nnArr[0]) == MultilayerPerceptron, "decoded not as LinearRegression"
Ejemplo n.º 14
0
 def test_numerical_keys_become_strs(self):
     # JSON should convert numbers to strings when they are dict keys
     self.assertEqual(({'1': 2}, {'3': 4}),
                      JSONProtocol.read(JSONProtocol.write({1: 2}, {3: 4})))
Ejemplo n.º 15
0
 def test_tuples_become_lists(self):
     # JSON should convert tuples into lists
     self.assertEqual(([1, 2], [3, 4]),
                      JSONProtocol.read(JSONProtocol.write((1, 2), (3, 4))))
Ejemplo n.º 16
0
 def test_tuples_become_lists(self):
     # JSON should convert tuples into lists
     self.assertEqual(([1, 2], [3, 4]),
                      JSONProtocol.read(JSONProtocol.write((1, 2), (3, 4))))
Ejemplo n.º 17
0
    NUMBER_RE = re.compile(r"[-?\d']+")
    input_file = 'sample_input.txt'
    with open(input_file, 'r') as out_file:
        data = [x.split() for x in out_file.read().splitlines()]

    # print(data)
    nodes = {}
    for line in data:
        nodes[int(line[0])] = []  #Will be written as null

    for line in data:
        #Check for dangling nodes
        if line[1:] == []:
            nodes[int(line[0])] = []  #Will be written as null
        else:
            nodes[int(line[0])].append(int(line[1:][0]))
    # print('nodes',nodes)
    # unique_nodes = sorted(set(nodes), key = lambda ele: nodes.count(ele))
    # print(nodes)
    # print(unique_nodes)
    unique_node_count = len(nodes.keys())
    initial_pagerank = 1 / unique_node_count

    j = JSONProtocol()

    with open("preprocessed_" + input_file, "wb+") as out_file:
        j = JSONProtocol()
        for _id, adj in nodes.items():
            out_file.write(j.write(_id, (adj, initial_pagerank)))
            out_file.write('\n'.encode('utf-8'))
Ejemplo n.º 18
0
    inF = open(inputFileName, 'r')
    myP4.stdin = inF.readlines()
    inF.close()

    # Variables for keeping track of convergence
    converged = False  # keeps track of convergence
    ctrValue = 0  # value of the counter (number of better paths found)
    iteration = 0  # number of iterations
    # Loops mrP4 until we have convergence
    while not converged:
        with myP4.make_runner() as runner:  # make a runner
            runner.run()  # run job
            nextIn = open(outputFileName, 'w')
            for line in runner.stream_output():
                key, value = myP4.parse_output_line(line)
                nextIn.write(JSONProtocol.write(key, value) + '\n')
                print '-> Output of MR Job is:', key, value
            nextIn.close()
            iteration += 1  # update number of iterations
            # Get counter
            ctr = runner.counters()
            ctrValue = ctr[0]['reducer'][
                'better found']  # Extract counter value
            if (ctrValue == 0):
                converged = True  # We have convergence
        # Get previous run's values
        with open(outputFileName, 'r') as nextIn:
            myP4.stdin = nextIn.readlines()

    # Output file reorganization
    s = []