Ejemplo n.º 1
0
def set_value(partition_key, sort_key):
    lookup_key = partition_key + ":" + sort_key
    machine_index = hashes["hashes"].get_machine(partition_key)
    response = requests.post("http://{}/set/{}/{}".format(
        servers[machine_index], partition_key, sort_key),
                             data=request.data)

    if lookup_key in indexed:
        return make_response(str(response.status_code), response.status_code)

    indexed[lookup_key] = True
    sort_index[partition_key + ":" + sort_key] = sort_key
    if sort_key not in sort_index:
        sort_index[sort_key] = pygtrie.CharTrie()
    sort_index[sort_key][partition_key] = partition_key + ":" + sort_key
    if partition_key not in between_index:
        between_index[partition_key] = Tree("", None, None)
    if partition_key not in partition_trees:
        partition_tree = both_between_index.insert(partition_key,
                                                   Tree("", None, None))
        partition_trees[partition_key] = partition_tree
    between_index[partition_key].insert(sort_key, partition_key,
                                        partition_key + ":" + sort_key)
    partition_trees[partition_key].partition_tree.insert(
        sort_key, partition_key, partition_key + ":" + sort_key)

    return make_response(str(response.status_code), response.status_code)
Ejemplo n.º 2
0
def set_value(partition_key, sort_key):

    lookup_key = partition_key + ":" + sort_key
    print("{} Saving {} to {}".format(self_server, request.data, lookup_key))
    data[lookup_key] = request.data.decode('utf-8')
    if lookup_key in indexed:
        return make_response(str(response.status_code), response.status_code)

    indexed[lookup_key] = True
    sort_index[partition_key + ":" + sort_key] = sort_key
    if sort_key not in sort_index:
        sort_index[sort_key] = pygtrie.CharTrie()
    sort_index[sort_key][partition_key] = partition_key + ":" + sort_key
    if partition_key not in between_index:
        between_index[partition_key] = Tree("", None, None)
    if partition_key not in partition_trees:
        partition_tree = both_between_index.insert(partition_key,
                                                   Tree("", None, None))
        partition_trees[partition_key] = partition_tree
    between_index[partition_key].insert(sort_key, partition_key,
                                        partition_key + ":" + sort_key)
    partition_trees[partition_key].partition_tree.insert(
        sort_key, partition_key, partition_key + ":" + sort_key)

    sql_index[sort_key] = lookup_key

    return make_response('', 202)
Ejemplo n.º 3
0
 def test_add(self):
     tree = Tree()
     root = tree.add()
     self.assertEqual(tree.add(), root.left)
     self.assertEqual(tree.size(), 2)
     self.assertEqual(tree.max_depth(), 2)
     self.assertEqual(tree.add(), root.right)
     self.assertEqual(tree.size(), 3)
     self.assertEqual(tree.max_depth(), 2)
Ejemplo n.º 4
0
 def test_root(self):
     tree = Tree()
     root = tree.add()
     self.assertEqual(root, tree.root)
     self.assertEqual(tree.size(), 1)
     self.assertEqual(tree.max_depth(), 1)
     self.assertEqual(list((n.node for n in tree)), [root])
     self.assertEqual(list((n.node for n in tree.in_order())), [root])
     self.assertFalse(root.left)
     self.assertFalse(root.right)
Ejemplo n.º 5
0
 def test_insert(self):
     tree = Tree()
     self.assertTrue(tree.is_bst())
     tree.insert(5)
     self.assertEqual(tree.size(), 1)
     self.assertEqual(self.values(tree), '5')
     self.assertTrue(tree.is_bst())
     tree.insert(2)
     self.assertEqual(tree.size(), 2)
     self.assertEqual(self.values(tree), '2 5')
     self.assertTrue(tree.is_bst())
     tree.insert(3)
     self.assertEqual(tree.size(), 3)
     self.assertEqual(self.values(tree), '2 3 5')
     self.assertTrue(tree.is_bst())
     tree.insert(8)
     self.assertEqual(tree.size(), 4)
     self.assertEqual(self.values(tree), '2 3 5 8')
     self.assertTrue(tree.is_bst())
     tree.insert(-1)
     self.assertEqual(tree.size(), 5)
     self.assertEqual(self.values(tree), '-1 2 3 5 8')
     self.assertEqual(self.reversed_values(tree), '8 5 3 2 -1')
     self.assertTrue(tree.is_bst())
     self.assertTrue(-1 <= tree.balanced_factor() <= 1)
Ejemplo n.º 6
0
 def test_empty_tree(self):
     tree = Tree()
     self.assertEqual(tree.size(), 0)
     self.assertEqual(tree.max_depth(), 0)
     self.assertEqual(list(tree), [])
Ejemplo n.º 7
0
            _minimal_height(slice_left, btree)


def minimal_height(sorted_array):
    mid_index = get_mid_index(sorted_array)
    root = TreeNode(value=sorted_array[mid_index])
    btree = BinaryTree(root)

    _minimal_height(sorted_array[:mid_index], btree)
    shift_factor = 1 if len(sorted_array) > 1 else 0
    _minimal_height(sorted_array[mid_index + shift_factor:], btree)

    return btree


if __name__ == '__main__':  # tests

    sorted_array = [5, 10, 15, 20, 27, 30, 45, 90, 100, 110, 115, 120]
    btree = minimal_height(sorted_array)

    def visit(lista):
        def _visit(node):
            lista.append(node)

        return _visit

    result = []
    Tree.visit_in_order(btree.root, visit(result))
    result = list(map(lambda node: node.value, result))
    assert result == sorted_array
Ejemplo n.º 8
0
    def execute(self):
        if self.parser.create_join_clause:
            print("Creating a join")
            print(self.parser.create_join_clause)
            for clause in self.parser.create_join_clause:
                left_table, left_field = clause[0].split(".")
                right_table, right_field = clause[1].split(".")
                print(left_table)
                print(right_table)
                if left_table in joins:
                    joins[left_table].append({"clause": clause})
                else:
                    joins[left_table] = [{"clause": clause}]
                if right_table in joins:
                    joins[right_table].append({"clause": clause})
                else:
                    joins[right_table] = [{"clause": clause}]

            print(joins)

            # if you insert into left table, you also need to insert join targets into right table
            # i need to do a select right.id from right_table where left_table.left_field = right_table.right_field
            # i need to insert left_field onto all servers that return a right id

        elif self.parser.update_table:
            entries = []
            for server in servers:
                subset = json.loads(
                    requests.post("http://{}/sql".format(server),
                                  data=json.dumps(
                                      {"parser": self.parser.__dict__})).text)
                if subset:
                    entries = entries + subset
            print("From data node")
            print(entries)

        elif self.parser.fts_clause:
            for server in servers:
                subset = json.loads(
                    requests.post("http://{}/sql".format(server),
                                  data=json.dumps(
                                      {"parser": self.parser.__dict__})).text)
                yield from subset

        elif self.parser.insert_values:
            insert_table = self.parser.insert_table
            print("Insert statement")
            created = False
            new_insert_count = 1
            for field, value in zip(self.parser.insert_fields,
                                    self.parser.insert_values):
                all_servers = []
                table_size = self.get_table_size(insert_table)
                if not created:
                    new_insert_count = table_size + 1
                table_counts[insert_table] = new_insert_count
                items = []

                # create full text search index
                if isinstance(value, str):
                    tokens = value.replace(",", "").split(" ")
                    for token in tokens:
                        new_key = "FTS.{}.{}.{}.{}".format(
                            insert_table, field, token, new_insert_count)
                        items.append({
                            "key": new_key,
                            "value": new_insert_count
                        })

                new_key = "R.{}.{}.{}".format(insert_table, new_insert_count,
                                              field)
                items.append({"key": new_key, "value": value})
                new_key = "S.{}.{}.{}.{}".format(insert_table, field, value,
                                                 new_insert_count)
                items.append({"key": new_key, "value": new_insert_count})
                new_key = "C.{}.{}.{}".format(insert_table, field,
                                              new_insert_count)
                items.append({"key": new_key, "value": value})
                if not created:
                    new_key = "R.{}.{}.id".format(insert_table,
                                                  new_insert_count)
                    new_id = {"key": new_key, "value": new_insert_count}
                    items.append(new_id)
                    created = True
                    all_servers.append(new_id)

                    new_key = "S.{}.{}.{}.{}".format(insert_table, "id",
                                                     new_insert_count,
                                                     new_insert_count)
                    items.append({"key": new_key, "value": new_insert_count})

                items.sort(key=itemgetter('key'))
                for item in all_servers:
                    for server in servers:
                        partition_key = "{}.{}".format(insert_table,
                                                       new_insert_count)
                        sort_key = item["key"]
                        lookup_key = partition_key + ":" + sort_key
                        response = requests.post("http://{}/set/{}/{}".format(
                            server, partition_key, sort_key),
                                                 data=str(item["value"]))

                for item in items:
                    partition_key = "{}.{}".format(insert_table,
                                                   new_insert_count)
                    sort_key = item["key"]
                    lookup_key = partition_key + ":" + sort_key
                    machine_index = hashes["hashes"].get_machine(partition_key)
                    response = requests.post("http://{}/set/{}/{}".format(
                        servers[machine_index], partition_key, sort_key),
                                             data=str(item["value"]))

                    if lookup_key not in indexed:

                        indexed[lookup_key] = True
                        sort_index[partition_key + ":" + sort_key] = sort_key
                        if sort_key not in sort_index:
                            sort_index[sort_key] = pygtrie.CharTrie()
                        sort_index[sort_key][
                            partition_key] = partition_key + ":" + sort_key
                        if partition_key not in between_index:
                            between_index[partition_key] = Tree("", None, None)
                        if partition_key not in partition_trees:
                            partition_tree = both_between_index.insert(
                                partition_key, Tree("", None, None))
                            partition_trees[partition_key] = partition_tree
                        between_index[partition_key].insert(
                            sort_key, partition_key,
                            partition_key + ":" + sort_key)
                        partition_trees[partition_key].partition_tree.insert(
                            sort_key, partition_key,
                            partition_key + ":" + sort_key)

            # we need to check if any materialized joins
                if insert_table in joins:
                    join_clauses = joins[insert_table]
                    print(join_clauses)
                    for join_clause in join_clauses:
                        clauses = join_clause["clause"]
                        left_components = clauses[0].split(".")
                        left_table = left_components[0]
                        left_field = left_components[1]

                        right_components = clauses[1].split(".")
                        right_table = right_components[0]
                        right_field = right_components[1]

                        if right_table == insert_table:
                            print("We need to swap")
                            temp_table = right_table
                            temp_field = right_field
                            right_table = left_table
                            right_field = left_field
                            left_table = temp_table
                            left_field = temp_field

                        print("Do we need to join this inserted data?")
                        print(field)
                        print(left_field)
                        search_value = value
                        if left_field == "id":
                            search_value = str(new_insert_count)

                        if field == left_field or left_field == "id":

                            # Do the prejoin
                            parser = Parser()
                            statement = "select {}.id, {}.{} from {} where {}.{} = {}".format(
                                right_table, right_table, right_field,
                                right_table, right_table, right_field,
                                search_value)
                            parser.parse(statement)
                            print(statement)
                            data = SQLExecutor(parser).execute()
                            for match in data:
                                for server in servers:
                                    server_value = match[1]
                                    print(
                                        "Data from {}, we are inserting {} into server {}"
                                        .format(server, server_value,
                                                servers[machine_index]))
                                    print("{} {}".format(
                                        left_table, right_table))
                                    response = requests.post(
                                        "http://{}/set/{}.{}/R.{}.{}.{}".
                                        format(server, right_table,
                                               server_value, right_table,
                                               server_value, right_field),
                                        data=str(search_value))
                                    response = requests.post(
                                        "http://{}/set/{}.{}/R.{}.{}.{}".
                                        format(servers[machine_index],
                                               right_table, server_value,
                                               right_table, server_value,
                                               "id"),
                                        data=server_value)
                                    if server != servers[machine_index]:
                                        # new_key = "R.{}.{}.{}".format(insert_table, new_insert_count, field)
                                        response = requests.post(
                                            "http://{}/set/{}.{}/R.{}.{}.{}".
                                            format(server, left_table,
                                                   new_insert_count,
                                                   left_table,
                                                   new_insert_count,
                                                   left_field),
                                            data=str(search_value))
                                        response = requests.post(
                                            "http://{}/set/{}.{}/R.{}.{}.{}".
                                            format(server, left_table,
                                                   new_insert_count,
                                                   left_table,
                                                   new_insert_count, "id"),
                                            data=str(new_insert_count))
                                    # have to create a key on

        elif self.parser.group_by:
            print("Group by statement")
            group_by_components = parser.group_by.split(".")
            aggregator = defaultdict(list)
            row_specifier = "C.{}.{}".format(group_by_components[0],
                                             group_by_components[1])
            for item in filter(lambda x: x["key"].startswith(row_specifier),
                               items):
                k = item["key"]
                v = item["value"]

                key_components = k.split(".")

                print(key_components[2])
                if (key_components[1] == group_by_components[0]) and (
                        key_components[2] == group_by_components[1]):
                    aggregator[v].append(v)

            print(statement)
            for k, v in aggregator.items():
                output_line = ""
                for item in parser.select_clause:
                    if "count" in item:
                        output_line += str(len(aggregator[k]))
                    else:
                        output_line += str(k) + " "
                print(output_line)

        elif self.parser.join_clause:

            server = random.choice(servers)
            records = json.loads(
                requests.post("http://{}/sql".format(server),
                              data=json.dumps({"parser":
                                               self.parser.__dict__})).text)

            print(records)

            missing_fields = set()
            missing_records = []
            for record in records:
                if record["missing_fields"]:
                    missing_fields = missing_fields.union(
                        set(record["missing_fields"]))
                    for dataitem in record["outputs"]:
                        missing_records.append(dataitem)
            print("Missing fields:")
            print(missing_fields)
            missing_index = {}

            for index, missing_record in enumerate(missing_records):
                missing_index[str(index)] = missing_record
                missing_record["missing_index"] = str(index)

            def trim_record(join_fields, items):
                for item in items:
                    data = {
                        "missing_index": item["missing_index"],
                        "id": item["id"]
                    }
                    for join_field in join_fields:
                        data[join_field] = item[join_field]
                    yield data

            join_fields = []
            join_specs = []

            for missing_field in missing_fields:
                for select_clause in self.parser.select_clause:
                    select_table, select_field = select_clause.split(".")
                    if select_field == missing_field:
                        for join_clause in self.parser.join_clause:
                            left_components = join_clause[0].split(".")
                            left_table = left_components[0]
                            left_field = left_components[1]
                            right_components = join_clause[1].split(".")
                            right_table = right_components[0]
                            right_field = right_components[1]

                            id_field = None
                            if select_table == left_table:
                                id_field = "id"
                                join_field = "{}_{}".format(left_table, "id")
                                print("Join field -> {}".format(join_field))

                            elif select_table == right_table:
                                id_field = "id"
                                join_field = "{}_{}".format(right_table, "id")
                                print("Join field -> {}".format(join_field))

                            if not id_field:
                                print(select_table)
                                print(left_table)
                                print(right_table)
                                continue

                            print("select {} from {} inner join {} on {} = {}".
                                  format(missing_field, "network_table",
                                         select_table, id_field, join_field))

                            join_fields.append(join_field)

                            join_specs.append({
                                "id_field": id_field,
                                "join_field": join_field,
                                "missing_field": missing_field,
                                "select_table": select_table
                            })

            valid_matches = list(
                trim_record(join_fields,
                            filter(lambda x: join_field in x,
                                   missing_records)))
            print("Valid matches")
            pprint(valid_matches)

            def getresults(server):
                response = json.loads(
                    requests.post("http://{}/networkjoin".format(server),
                                  data=json.dumps({
                                      "parser": self.parser.__dict__,
                                      "join_specs": join_specs,
                                      "records": valid_matches
                                  })).text)

                yield response

            with ThreadPoolExecutor(max_workers=len(servers)) as executor:
                future = executor.map(getresults, servers)
                print("Doing network join...")
                for server in future:
                    for rowset in server:
                        print("Missing data records")
                        print(len(missing_records))
                        pprint(missing_records)
                        for missing_data in rowset:
                            print("Missing data")
                            print(missing_data)
                            missing_field, missing_index_key, found_data = missing_data
                            if missing_data:
                                missing_index[missing_index_key][
                                    missing_field] = found_data

            outputs = []
            for item in records:
                for obj in item["outputs"]:
                    outputs.append(obj)
            # here
            header = ""
            output_lines = []
            have_printed_header = False
            for result in outputs:
                skip = False
                output_line = []
                for field in self.parser.select_clause:

                    if field == "*":
                        for key, value in result.items():
                            if not have_printed_header:
                                header.append(key)
                            output_line.append(value)
                    else:
                        table, field_name = field.split(".")
                        if field_name not in result:
                            skip = True
                        else:
                            output_line.append(result[field_name])
                if skip:
                    continue
                output_lines.append(output_line)
                have_printed_header = True
            print(header)
            yield from output_lines

        elif self.parser.select_clause:
            for server in servers:
                subset = json.loads(
                    requests.post("http://{}/sql".format(server),
                                  data=json.dumps(
                                      {"parser": self.parser.__dict__})).text)
                for result in subset:
                    item = [server] + result
                    yield item
Ejemplo n.º 9
0
    def execute(self):
        if self.parser["updates"]:
            table_datas, field_reductions = self.get_tables(
                [["{}.".format(self.parser["update_table"])]])
            for result in self.process_wheres(field_reductions[0][0]):
                for update in self.parser["updates"]:
                    updated_to, new_value = update
                    updated_field = updated_to.split(".")[1]
                    partition_key = "{}.{}".format(self.parser["update_table"],
                                                   result["id"])

                    deindex(partition_key, self.parser["update_table"],
                            result["id"], updated_field, result[updated_field])

                    # now update the data
                    items = []
                    insert_table = self.parser["update_table"]
                    field = updated_field

                    if isinstance(new_value, str):
                        tokens = new_value.replace(",", "").split(" ")
                        for token in tokens:
                            new_key = "FTS.{}.{}.{}.{}".format(
                                insert_table, field, token, result["id"])
                            items.append({
                                "key": new_key,
                                "value": result["id"]
                            })

                    new_key = "R.{}.{}.{}".format(insert_table, result["id"],
                                                  field)
                    items.append({"key": new_key, "value": new_value})
                    new_key = "S.{}.{}.{}.{}".format(insert_table, field,
                                                     new_value, result["id"])
                    items.append({"key": new_key, "value": result["id"]})
                    new_key = "C.{}.{}.{}".format(insert_table, field,
                                                  result["id"])
                    items.append({"key": new_key, "value": new_value})

                    items.sort(key=itemgetter('key'))

                    for item in items:
                        sort_key = item["key"]
                        lookup_key = partition_key + ":" + sort_key
                        data[lookup_key] = item["value"]

                        indexed[lookup_key] = True
                        sort_index[partition_key + ":" + sort_key] = sort_key
                        if sort_key not in sort_index:
                            sort_index[sort_key] = pygtrie.CharTrie()
                        sort_index[sort_key][
                            partition_key] = partition_key + ":" + sort_key
                        if partition_key not in between_index:
                            between_index[partition_key] = Tree("", None, None)
                        if partition_key not in partition_trees:
                            partition_tree = both_between_index.insert(
                                partition_key, Tree("", None, None))
                            partition_trees[partition_key] = partition_tree
                        between_index[partition_key].insert(
                            sort_key, partition_key,
                            partition_key + ":" + sort_key)
                        partition_trees[partition_key].partition_tree.insert(
                            sort_key, partition_key,
                            partition_key + ":" + sort_key)
                        sql_index[sort_key] = lookup_key

        elif self.parser["fts_clause"]:
            # full text search
            table_datas, field_reductions = self.get_tables(
                [["{}.".format(self.parser["table_name"])]])

            table_datas, field_reductions = self.mark_join_table(
                table_datas, field_reductions, self.parser["table_name"])
            table_datas = self.rewrite_joins(table_datas)

            have_printed_header = False
            header = []
            output_lines = []
            outputs = []

            for result in self.process_wheres(field_reductions[0][0]):
                output_lines = []
                for field in self.parser["select_clause"]:

                    if field == "*":
                        for key, value in result.items():
                            if not have_printed_header:
                                header.append(key)
                            output_lines.append(value)
                    else:
                        output_lines.append(result[field])
                have_printed_header = True
                outputs.append(output_lines)
            print(header)
            print(outputs)
            yield from output_lines

        elif self.parser["group_by"]:
            print("Group by statement")
            group_by_components = parser.group_by.split(".")
            aggregator = defaultdict(list)
            row_specifier = "C.{}.{}".format(group_by_components[0],
                                             group_by_components[1])
            for item in filter(lambda x: x["key"].startswith(row_specifier),
                               items):
                k = item["key"]
                v = item["value"]

                key_components = k.split(".")

                print(key_components[2])
                if (key_components[1] == group_by_components[0]) and (
                        key_components[2] == group_by_components[1]):
                    aggregator[v].append(v)

            print(statement)
            for k, v in aggregator.items():
                output_line = ""
                for item in parser.select_clause:
                    if "count" in item:
                        output_line += str(len(aggregator[k]))
                    else:
                        output_line += str(k) + " "
                print(output_line)

        elif self.parser["join_clause"]:
            table_datas, field_reductions = self.get_tables(
                self.parser["join_clause"])

            table_datas, field_reductions = self.mark_join_table(
                table_datas, field_reductions, self.parser["table_name"])
            table_datas = self.rewrite_joins(table_datas)

            previous = list(self.hash_join(0, table_datas))
            print("First join")
            for index, pair in enumerate(table_datas[1:]):
                entries = table_datas[index + 1]
                table_name, collection, field, size = entries[0]
                if collection == "previous":
                    table_datas[index + 1][0] = (table_name, previous, field,
                                                 size)

                previous = list(self.hash_join(index + 1, table_datas))
                print("Second join")

            records = self.process_wheres(previous)
            print("records from join" + str(records))
            print(len(records))
            missing_fields = set()
            output_lines = []
            for record in records:
                # output_line = []
                output_lines.append(record)
                for clause in self.parser["select_clause"]:
                    table, field = clause.split(".")
                    if field not in record:
                        missing_fields.add(field)
                # output_lines.append(output_line)
            print(len(output_lines))
            print(len(records))
            yield {
                "outputs": output_lines,
                "missing_fields": list(missing_fields)
            }

        elif self.parser["select_clause"]:
            table_datas, field_reductions = self.get_tables(
                [["{}.".format(self.parser["table_name"])]])
            have_printed_header = False
            header = []
            output_lines = []
            for result in self.process_wheres(field_reductions[0][0]):
                skip = False
                output_line = []
                for field in self.parser["select_clause"]:
                    if field == "*":
                        for key, value in result.items():

                            if not have_printed_header:
                                header.append(key)
                            output_line.append(value)
                    else:
                        table, field_name = field.split(".")
                        if field_name not in result:
                            skip = True
                        else:
                            output_line.append(result[field_name])
                if skip:
                    continue
                output_lines.append(output_line)
                have_printed_header = True

            yield from output_lines
            print(header)
            print(output_lines)
Ejemplo n.º 10
0
sql_index = pygtrie.CharTrie()

response = requests.post("http://{}/bootstrap/{}".format(
    args.server, args.port))
print(response.text)
bootstrapped_keys = json.loads(response.text)
for lookup_key, value in bootstrapped_keys.items():
    data[lookup_key] = value
    indexed[lookup_key] = True
    partition_key, sort_key = lookup_key.split(":")
    sort_index[partition_key + ":" + sort_key] = sort_key
    if sort_key not in sort_index:
        sort_index[sort_key] = pygtrie.CharTrie()
    sort_index[sort_key][partition_key] = partition_key + ":" + sort_key
    if partition_key not in between_index:
        between_index[partition_key] = Tree("", None, None)
    if partition_key not in partition_trees:
        partition_tree = both_between_index.insert(partition_key,
                                                   Tree("", None, None))
        partition_trees[partition_key] = partition_tree
    between_index[partition_key].insert(sort_key, partition_key,
                                        partition_key + ":" + sort_key)
    partition_trees[partition_key].partition_tree.insert(
        sort_key, partition_key, partition_key + ":" + sort_key)

    sql_index[sort_key] = lookup_key

app = Flask(__name__)


@app.route("/get/<lookup_key>", methods=["POST"])