Example #1
0
    def put(self, local, remote, use_sudo=False):
        if self.conn.command_cwds and not remote.startswith(("/", "~")):
            remote = self.conn.command_cwds[-1].rstrip("/'") + "/" + remote

        if use_sudo:
            filename = "/tmp/" + Random.hex(20)
            self.conn.put(File(local).abspath, filename)
            self.sudo("cp " + filename + " " + remote)
            self.sudo("rm " + filename)
        else:
            self.conn.put(File(local).abspath, remote)
Example #2
0
    def _scan_database(self):
        # GET ALL RELATIONS
        raw_relations = self.db.query(
            """
            SELECT
                table_schema,
                table_name,
                referenced_table_schema,
                referenced_table_name,
                referenced_column_name,
                constraint_name,
                column_name,
                ordinal_position
            FROM
                information_schema.key_column_usage
            WHERE
                referenced_column_name IS NOT NULL
            """,
            param=self.settings.database,
        )

        if not raw_relations:
            Log.error("No relations in the database")

        for r in self.settings.add_relations:
            try:
                lhs, rhs = map(strings.trim, r.split("->"))
                lhs = lhs.split(".")
                if len(lhs) == 2:
                    lhs = [self.settings.database.schema] + lhs
                rhs = rhs.split(".")
                if len(rhs) == 2:
                    rhs = [self.settings.database.schema] + rhs
                to_add = Data(
                    ordinal_position=1,  # CAN ONLY HANDLE 1-COLUMN RELATIONS
                    table_schema=lhs[0],
                    table_name=lhs[1],
                    column_name=lhs[2],
                    referenced_table_schema=rhs[0],
                    referenced_table_name=rhs[1],
                    referenced_column_name=rhs[2],
                )

                # CHECK IF EXISTING
                if jx.filter(raw_relations, {"eq": to_add}):
                    Log.note("Relation {{relation}} already exists",
                             relation=r)
                    continue

                to_add.constraint_name = Random.hex(20)
                raw_relations.append(to_add)
            except Exception as e:
                Log.error("Could not parse {{line|quote}}", line=r, cause=e)

        relations = jx.select(
            raw_relations,
            [
                {
                    "name": "constraint.name",
                    "value": "constraint_name"
                },
                {
                    "name": "table.schema",
                    "value": "table_schema"
                },
                {
                    "name": "table.name",
                    "value": "table_name"
                },
                {
                    "name": "column.name",
                    "value": "column_name"
                },
                {
                    "name": "referenced.table.schema",
                    "value": "referenced_table_schema"
                },
                {
                    "name": "referenced.table.name",
                    "value": "referenced_table_name"
                },
                {
                    "name": "referenced.column.name",
                    "value": "referenced_column_name"
                },
                {
                    "name": "ordinal_position",
                    "value": "ordinal_position"
                },
            ],
        )

        # GET ALL TABLES
        raw_tables = self.db.query("""
            SELECT
                t.table_schema,
                t.table_name,
                c.constraint_name,
                c.constraint_type,
                k.column_name,
                k.ordinal_position
            FROM
                information_schema.tables t
            LEFT JOIN
                information_schema.table_constraints c on c.table_name=t.table_name AND c.table_schema=t.table_schema and (constraint_type='UNIQUE' or constraint_type='PRIMARY KEY')
            LEFT JOIN
                information_schema.key_column_usage k on k.constraint_name=c.constraint_name AND k.table_name=t.table_name and k.table_schema=t.table_schema
            ORDER BY
                t.table_schema,
                t.table_name,
                c.constraint_name,
                k.ordinal_position,
                k.column_name
            """)

        # ORGANIZE, AND PICK ONE UNIQUE CONSTRAINT FOR LINKING
        tables = UniqueIndex(keys=["name", "schema"])
        for t, c in jx.groupby(raw_tables, ["table_name", "table_schema"]):
            c = wrap(list(c))
            best_index = Null
            is_referenced = False
            is_primary = False
            for g, w in jx.groupby(c, "constraint_name"):
                if not g.constraint_name:
                    continue
                w = list(w)
                ref = False
                for r in relations:
                    if (r.table.name == t.table_name
                            and r.table.schema == t.table_schema
                            and r.constraint.name == g.constraint_name):
                        ref = True
                is_prime = w[0].constraint_type == "PRIMARY"

                reasons_this_one_is_better = [
                    best_index == None,  # WE DO NOT HAVE A CANDIDATE YET
                    is_prime
                    and not is_primary,  # PRIMARY KEYS ARE GOOD TO HAVE
                    is_primary == is_prime and ref and
                    not is_referenced,  # REFERENCED UNIQUE TUPLES ARE GOOD TOO
                    is_primary == is_prime and ref == is_referenced and len(w)
                    < len(best_index),  # THE SHORTER THE TUPLE, THE BETTER
                ]
                if any(reasons_this_one_is_better):
                    is_primary = is_prime
                    is_referenced = ref
                    best_index = w

            tables.add({
                "name": t.table_name,
                "schema": t.table_schema,
                "id": [b.column_name for b in best_index],
            })

        fact_table = tables[self.settings.fact_table,
                            self.settings.database.schema]
        ids_table = {
            "alias": "t0",
            "name": "__ids__",
            "schema": fact_table.schema,
            "id": fact_table.id,
        }
        relations.extend(
            wrap({
                "constraint": {
                    "name": "__link_ids_to_fact_table__"
                },
                "table": ids_table,
                "column": {
                    "name": c
                },
                "referenced": {
                    "table": fact_table,
                    "column": {
                        "name": c
                    }
                },
                "ordinal_position": i,
            }) for i, c in enumerate(fact_table.id))
        tables.add(ids_table)

        # GET ALL COLUMNS
        raw_columns = self.db.query("""
            SELECT
                column_name,
                table_schema,
                table_name,
                ordinal_position,
                data_type
            FROM
                information_schema.columns
            """)

        reference_only_tables = [
            r.split(".")[0] for r in self.settings.reference_only
            if len(r.split(".")) == 2
        ]
        reference_all_tables = [
            r.split(".")[0] for r in self.settings.reference_only
            if len(r.split(".")) == 1
        ]
        foreign_column_table_schema_triples = {(r.column.name, r.table.name,
                                                r.table.schema)
                                               for r in relations}
        referenced_column_table_schema_triples = {(
            r.referenced.column.name,
            r.referenced.table.name,
            r.referenced.table.schema,
        )
                                                  for r in relations}
        related_column_table_schema_triples = (
            foreign_column_table_schema_triples
            | referenced_column_table_schema_triples)

        columns = UniqueIndex(["column.name", "table.name", "table.schema"])
        for c in raw_columns:
            if c.table_name in reference_only_tables:
                if c.table_name + "." + c.column_name in self.settings.reference_only:
                    include = True
                    reference = True
                    foreign = False
                elif c.column_name in tables[(c.table_name,
                                              c.table_schema)].id:
                    include = self.settings.show_foreign_keys
                    reference = False
                    foreign = False
                else:
                    include = False
                    reference = False
                    foreign = False
            elif c.table_name in reference_all_tables:
                # TABLES USED FOR REFERENCE, NO NESTED DOCUMENTS EXPECTED
                if c.column_name in tables[(c.table_name, c.table_schema)].id:
                    include = self.settings.show_foreign_keys
                    reference = True
                    foreign = False
                elif (
                        c.column_name,
                        c.table_name,
                        c.table_schema,
                ) in foreign_column_table_schema_triples:
                    include = False
                    reference = False
                    foreign = True
                else:
                    include = True
                    reference = False
                    foreign = False
            elif c.column_name in tables[(c.table_name, c.table_schema)].id:
                include = self.settings.show_foreign_keys
                reference = False
                foreign = False
            elif (
                    c.column_name,
                    c.table_name,
                    c.table_schema,
            ) in foreign_column_table_schema_triples:
                include = False
                reference = False
                foreign = True
            elif (
                    c.column_name,
                    c.table_name,
                    c.table_schema,
            ) in referenced_column_table_schema_triples:
                include = self.settings.show_foreign_keys
                reference = False
                foreign = False
            else:
                include = True
                reference = False
                foreign = False

            rel = {
                "column": {
                    "name": c.column_name,
                    "type": c.data_type
                },
                "table": {
                    "name": c.table_name,
                    "schema": c.table_schema
                },
                "ordinal_position": c.ordinal_position,
                "is_id": c.column_name
                in tables[(c.table_name, c.table_schema)].id,
                "include": include,  # TRUE IF THIS COLUMN IS OUTPUTTED
                "reference":
                reference,  # TRUE IF THIS COLUMN REPRESENTS THE ROW
                "foreign":
                foreign,  # TRUE IF THIS COLUMN POINTS TO ANOTHER ROW
            }
            columns.add(rel)

        # ITERATE OVER ALL PATHS
        todo = FlatList()
        output_columns = FlatList()
        nested_path_to_join = {}
        all_nested_paths = [["."]]

        def follow_paths(position, path, nested_path, done_relations,
                         no_nested_docs):
            if position.name in self.settings.exclude:
                return

            if self.path_not_allowed(path):
                return
            if DEBUG:
                Log.note("Trace {{path}}", path=path)
            if position.name != "__ids__":
                # USED TO CONFIRM WE CAN ACCESS THE TABLE (WILL THROW ERROR WHEN IF IT FAILS)
                self.db.query(
                    ConcatSQL(
                        SQL_SELECT,
                        SQL_STAR,
                        SQL_FROM,
                        quote_column(position.schema, position.name),
                        SQL_LIMIT,
                        SQL_ONE,
                    ))

            if position.name in reference_all_tables:
                no_nested_docs = True
            if position.name in reference_only_tables:
                return
            curr_join_list = copy(nested_path_to_join[nested_path[0]])

            ###############################################################################
            # INNER OBJECTS
            ###############################################################################
            referenced_tables = list(
                sort_using_key(
                    jx.groupby(
                        jx.filter(
                            relations,
                            {
                                "eq": {
                                    "table.name": position.name,
                                    "table.schema": position.schema,
                                }
                            },
                        ),
                        "constraint.name",
                    ),
                    key=lambda p: first(p[1]).column.name,
                ))
            for g, constraint_columns in referenced_tables:
                g = unwrap(g)
                constraint_columns = deepcopy(constraint_columns)
                if g["constraint.name"] in done_relations:
                    continue
                if any(cc for cc in constraint_columns
                       if cc.referenced.table.name in self.settings.exclude):
                    continue

                done_relations.add(g["constraint.name"])

                many_to_one_joins = nested_path_to_join[nested_path[0]]
                index = len(many_to_one_joins)

                alias = "t" + text(index)
                for c in constraint_columns:
                    c.referenced.table.alias = alias
                    c.table = position
                many_to_one_joins.append({
                    "join_columns": constraint_columns,
                    "path": path,
                    "nested_path": nested_path,
                })

                # HANDLE THE COMMON *id SUFFIX
                name = []
                for cname, tname in zip(
                        constraint_columns.column.name,
                        constraint_columns.referenced.table.name,
                ):
                    if cname.startswith(tname):
                        name.append(tname)
                    elif cname.endswith("_id"):
                        name.append(cname[:-3])
                    else:
                        name.append(cname)

                relation_string = many_to_one_string(constraint_columns[0])
                step = "/".join(name)
                if len(constraint_columns) == 1:
                    step = self.name_relations.get(relation_string, step)

                referenced_column_path = concat_field(path, step)
                if self.path_not_allowed(referenced_column_path):
                    continue

                if referenced_column_path in reference_only_tables:
                    continue

                col_pointer_name = relative_field(referenced_column_path,
                                                  nested_path[0])
                for col in columns:
                    if (col.table.name
                            == constraint_columns[0].referenced.table.name
                            and col.table.schema
                            == constraint_columns[0].referenced.table.schema):
                        col_full_name = concat_field(
                            col_pointer_name, literal_field(col.column.name))

                        if (col.is_id and len(nested_path) == 1
                                and col.table.name == fact_table.name
                                and col.table.schema == fact_table.schema):
                            # ALWAYS SHOW THE ID OF THE FACT
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text(c_index),
                                "column":
                                col,
                                "sort":
                                True,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name,
                            })
                        elif col.column.name == constraint_columns[
                                0].column.name:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name
                                if self.settings.show_foreign_keys else None,
                            })
                        elif col.is_id:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name
                                if self.settings.show_foreign_keys else None,
                            })
                        elif col.reference:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_pointer_name
                                if not self.settings.show_foreign_keys else
                                col_full_name,  # REFERENCE FIELDS CAN REPLACE THE WHOLE OBJECT BEING REFERENCED
                            })
                        elif col.include:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name,
                            })

                if position.name in reference_only_tables:
                    continue

                todo.append(
                    Data(
                        position=copy(constraint_columns[0].referenced.table),
                        path=referenced_column_path,
                        nested_path=nested_path,
                        done_relations=copy(done_relations),
                        no_nested_docs=no_nested_docs,
                    ))
            ###############################################################################
            # NESTED OBJECTS
            ###############################################################################
            if not no_nested_docs:
                nesting_tables = list(
                    sort_using_key(
                        jx.groupby(
                            jx.filter(
                                relations,
                                {
                                    "eq": {
                                        "referenced.table.name": position.name,
                                        "referenced.table.schema":
                                        position.schema,
                                    }
                                },
                            ),
                            "constraint.name",
                        ),
                        key=lambda p: [(r.table.name, r.column.name)
                                       for r in [first(p[1])]][0],
                    ))

                for g, constraint_columns in nesting_tables:
                    g = unwrap(g)
                    constraint_columns = deepcopy(constraint_columns)
                    if g["constraint.name"] in done_relations:
                        continue
                    done_relations.add(g["constraint.name"])

                    many_table = set(constraint_columns.table.name)
                    if not (many_table - self.settings.exclude):
                        continue

                    relation_string = one_to_many_string(constraint_columns[0])
                    step = "/".join(many_table)
                    if len(constraint_columns) == 1:
                        step = self.name_relations.get(relation_string, step)

                    referenced_column_path = concat_field(path, step)
                    if self.path_not_allowed(referenced_column_path):
                        continue

                    new_nested_path = [referenced_column_path] + nested_path
                    all_nested_paths.append(new_nested_path)

                    if referenced_column_path in nested_path_to_join:
                        Log.error(
                            "{{path}} already exists, try adding entry to name_relations",
                            path=referenced_column_path,
                        )
                    one_to_many_joins = nested_path_to_join[
                        referenced_column_path] = copy(curr_join_list)
                    index = len(one_to_many_joins)
                    alias = "t" + text(index)
                    for c in constraint_columns:
                        c.table.alias = alias
                        c.referenced.table = position
                    one_to_many_joins.append(
                        set_default(
                            {},
                            g,
                            {
                                "children": True,
                                "join_columns": constraint_columns,
                                "path": path,
                                "nested_path": nested_path,
                            },
                        ))
                    for col in columns:
                        if (col.table.name == constraint_columns[0].table.name
                                and col.table.schema
                                == constraint_columns[0].table.schema):
                            col_full_name = join_field(
                                split_field(referenced_column_path)
                                [len(split_field(new_nested_path[0])):] +
                                [literal_field(col.column.name)])

                            if col.column.name == constraint_columns[
                                    0].column.name:
                                c_index = len(output_columns)
                                output_columns.append({
                                    "table_alias":
                                    alias,
                                    "column_alias":
                                    "c" + text(c_index),
                                    "column":
                                    col,
                                    "sort":
                                    col.is_id,
                                    "path":
                                    referenced_column_path,
                                    "nested_path":
                                    new_nested_path,
                                    "put":
                                    col_full_name if
                                    self.settings.show_foreign_keys else None,
                                })
                            elif col.is_id:
                                c_index = len(output_columns)
                                output_columns.append({
                                    "table_alias":
                                    alias,
                                    "column_alias":
                                    "c" + text(c_index),
                                    "column":
                                    col,
                                    "sort":
                                    col.is_id,
                                    "path":
                                    referenced_column_path,
                                    "nested_path":
                                    new_nested_path,
                                    "put":
                                    col_full_name if
                                    self.settings.show_foreign_keys else None,
                                })
                            else:
                                c_index = len(output_columns)
                                output_columns.append({
                                    "table_alias":
                                    alias,
                                    "column_alias":
                                    "c" + text(c_index),
                                    "column":
                                    col,
                                    "sort":
                                    col.is_id,
                                    "path":
                                    referenced_column_path,
                                    "nested_path":
                                    new_nested_path,
                                    "put":
                                    col_full_name if col.include else None,
                                })

                    todo.append(
                        Data(
                            position=constraint_columns[0].table,
                            path=referenced_column_path,
                            nested_path=new_nested_path,
                            done_relations=copy(done_relations),
                            no_nested_docs=no_nested_docs,
                        ))

        path = "."
        nested_path = [path]
        nested_path_to_join["."] = [{
            "path":
            path,
            "join_columns": [{
                "referenced": {
                    "table": ids_table
                }
            }],
            "nested_path":
            nested_path,
        }]

        todo.append(
            Data(
                position=ids_table,
                path=path,
                nested_path=nested_path,
                done_relations=set(),
                no_nested_docs=False,
            ))

        while todo:
            item = todo.pop(0)
            follow_paths(**item)

        self.all_nested_paths = all_nested_paths
        self.nested_path_to_join = nested_path_to_join
        self.columns = output_columns
Example #3
0
    def _install_es(self, gigabytes):
        volumes = self.instance.markup.drives

        if not fabric_files.exists("/usr/local/elasticsearch"):
            with cd("/home/ec2-user/"):
                run("mkdir -p temp")

            if not File(LOCAL_JRE).exists:
                Log.error("Expecting {{file}} on manager to spread to ES instances", file=LOCAL_JRE)
            with cd("/home/ec2-user/temp"):
                run('rm -f '+JRE)
                put("resources/"+JRE, JRE)
                sudo("rpm -i "+JRE)
                sudo("alternatives --install /usr/bin/java java /usr/java/default/bin/java 20000")
                run("export JAVA_HOME=/usr/java/default")

            with cd("/home/ec2-user/"):
                run('wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.7.1.tar.gz')
                run('tar zxfv elasticsearch-1.7.1.tar.gz')
                sudo('mkdir /usr/local/elasticsearch')
                sudo('cp -R elasticsearch-1.7.1/* /usr/local/elasticsearch/')

            with cd('/usr/local/elasticsearch/'):
                # BE SURE TO MATCH THE PLUGLIN WITH ES VERSION
                # https://github.com/elasticsearch/elasticsearch-cloud-aws
                sudo('bin/plugin -install elasticsearch/elasticsearch-cloud-aws/2.7.1')

            #REMOVE THESE FILES, WE WILL REPLACE THEM WITH THE CORRECT VERSIONS AT THE END
            sudo("rm -f /usr/local/elasticsearch/config/elasticsearch.yml")
            sudo("rm -f /usr/local/elasticsearch/bin/elasticsearch.in.sh")

        self.conn = self.instance.connection

        # MOUNT AND FORMAT THE EBS VOLUMES (list with `lsblk`)
        for i, k in enumerate(volumes):
            if not fabric_files.exists(k.path):
                with fabric_settings(warn_only=True):
                    sudo('sudo umount '+k.device)

                sudo('yes | sudo mkfs -t ext4 '+k.device)
                sudo('mkdir '+k.path)
                sudo('sudo mount '+k.device+' '+k.path)

                #ADD TO /etc/fstab SO AROUND AFTER REBOOT
                sudo("sed -i '$ a\\"+k.device+"   "+k.path+"       ext4    defaults,nofail  0   2' /etc/fstab")

        # TEST IT IS WORKING
        sudo('mount -a')

        # INCREASE THE FILE HANDLE LIMITS
        with cd("/home/ec2-user/"):
            File("./results/temp/sysctl.conf").delete()
            get("/etc/sysctl.conf", "./results/temp/sysctl.conf", use_sudo=True)
            lines = File("./results/temp/sysctl.conf").read()
            if lines.find("fs.file-max = 100000") == -1:
                lines += "\nfs.file-max = 100000"
            lines = lines.replace("net.bridge.bridge-nf-call-ip6tables = 0", "")
            lines = lines.replace("net.bridge.bridge-nf-call-iptables = 0", "")
            lines = lines.replace("net.bridge.bridge-nf-call-arptables = 0", "")
            File("./results/temp/sysctl.conf").write(lines)
            put("./results/temp/sysctl.conf", "/etc/sysctl.conf", use_sudo=True)

        sudo("sysctl -p")

        # INCREASE FILE HANDLE PERMISSIONS
        sudo("sed -i '$ a\\root soft nofile 50000' /etc/security/limits.conf")
        sudo("sed -i '$ a\\root hard nofile 100000' /etc/security/limits.conf")
        sudo("sed -i '$ a\\root memlock unlimited' /etc/security/limits.conf")

        sudo("sed -i '$ a\\ec2-user soft nofile 50000' /etc/security/limits.conf")
        sudo("sed -i '$ a\\ec2-user hard nofile 100000' /etc/security/limits.conf")
        sudo("sed -i '$ a\\ec2-user memlock unlimited' /etc/security/limits.conf")

        # EFFECTIVE LOGIN TO LOAD CHANGES TO FILE HANDLES
        # sudo("sudo -i -u ec2-user")

        if not fabric_files.exists("/data1/logs"):
            sudo('mkdir /data1/logs')
            sudo('mkdir /data1/heapdump')

            #INCREASE NUMBER OF FILE HANDLES
            # sudo("sysctl -w fs.file-max=64000")
        # COPY CONFIG FILE TO ES DIR
        if not fabric_files.exists("/usr/local/elasticsearch/config/elasticsearch.yml"):
            yml = File("./examples/config/es_config.yml").read().replace("\r", "")
            yml = expand_template(yml, {
                "id": Random.hex(length=8),
                "data_paths": ",".join("/data"+unicode(i+1) for i, _ in enumerate(volumes))
            })
            File("./results/temp/elasticsearch.yml").write(yml)
            put("./results/temp/elasticsearch.yml", '/usr/local/elasticsearch/config/elasticsearch.yml', use_sudo=True)

        # FOR SOME REASON THE export COMMAND DOES NOT SEEM TO WORK
        # THIS SCRIPT SETS THE ES_MIN_MEM/ES_MAX_MEM EXPLICITLY
        if not fabric_files.exists("/usr/local/elasticsearch/bin/elasticsearch.in.sh"):
            sh = File("./examples/config/es_run.sh").read().replace("\r", "")
            sh = expand_template(sh, {"memory": unicode(int(gigabytes/2))})
            File("./results/temp/elasticsearch.in.sh").write(sh)
            with cd("/home/ec2-user"):
                put("./results/temp/elasticsearch.in.sh", './temp/elasticsearch.in.sh', use_sudo=True)
                sudo("cp -f ./temp/elasticsearch.in.sh /usr/local/elasticsearch/bin/elasticsearch.in.sh")
Example #4
0
def random_id():
    return Random.hex(40)
Example #5
0
    def _scan_database(self):
        # GET ALL RELATIONS
        raw_relations = self.db.query("""
            SELECT
                table_schema,
                table_name,
                referenced_table_schema,
                referenced_table_name,
                referenced_column_name,
                constraint_name,
                column_name,
                ordinal_position
            FROM
                information_schema.key_column_usage
            WHERE
                referenced_column_name IS NOT NULL
        """,
                                      param=self.settings.database)

        if not raw_relations:
            Log.error("No relations in the database")

        for r in self.settings.add_relations:
            try:
                a, b = map(strings.trim, r.split("->"))
                a = a.split(".")
                b = b.split(".")
                raw_relations.append(
                    Data(table_schema=a[0],
                         table_name=a[1],
                         referenced_table_schema=b[0],
                         referenced_table_name=b[1],
                         referenced_column_name=b[2],
                         constraint_name=Random.hex(20),
                         column_name=a[2],
                         ordinal_position=1))
            except Exception as e:
                Log.error("Could not parse {{line|quote}}", line=r, cause=e)

        relations = jx.select(raw_relations,
                              [{
                                  "name": "constraint.name",
                                  "value": "constraint_name"
                              }, {
                                  "name": "table.schema",
                                  "value": "table_schema"
                              }, {
                                  "name": "table.name",
                                  "value": "table_name"
                              }, {
                                  "name": "column.name",
                                  "value": "column_name"
                              }, {
                                  "name": "referenced.table.schema",
                                  "value": "referenced_table_schema"
                              }, {
                                  "name": "referenced.table.name",
                                  "value": "referenced_table_name"
                              }, {
                                  "name": "referenced.column.name",
                                  "value": "referenced_column_name"
                              }, {
                                  "name": "ordinal_position",
                                  "value": "ordinal_position"
                              }])

        # GET ALL TABLES
        raw_tables = self.db.query("""
            SELECT
                t.table_schema,
                t.table_name,
                c.constraint_name,
                c.constraint_type,
                k.column_name,
                k.ordinal_position
            FROM
                information_schema.tables t
            LEFT JOIN
                information_schema.table_constraints c on c.table_name=t.table_name AND c.table_schema=t.table_schema and (constraint_type='UNIQUE' or constraint_type='PRIMARY KEY')
            LEFT JOIN
                information_schema.key_column_usage k on k.constraint_name=c.constraint_name AND k.table_name=t.table_name and k.table_schema=t.table_schema
            ORDER BY
                t.table_schema,
                t.table_name,
                c.constraint_name,
                k.ordinal_position,
                k.column_name
        """,
                                   param=self.settings.database)

        # ORGANIZE, AND PICK ONE UNIQUE CONSTRAINT FOR LINKING
        tables = UniqueIndex(keys=["name", "schema"])
        for t, c in jx.groupby(raw_tables, ["table_name", "table_schema"]):
            c = wrap(list(c))
            best_index = Null
            is_referenced = False
            is_primary = False
            for g, w in jx.groupby(c, "constraint_name"):
                if not g.constraint_name:
                    continue
                w = list(w)
                ref = False
                for r in relations:
                    if r.table.name == t.table_name and r.table.schema == t.table_schema and r.constraint.name == g.constraint_name:
                        ref = True
                is_prime = w[0].constraint_type == "PRIMARY"

                reasons_this_one_is_better = [
                    best_index == None,  # WE DO NOT HAVE A CANDIDATE YET
                    is_prime
                    and not is_primary,  # PRIMARY KEYS ARE GOOD TO HAVE
                    is_primary == is_prime and ref and
                    not is_referenced,  # REFERENCED UNIQUE TUPLES ARE GOOD TOO
                    is_primary == is_prime and ref == is_referenced and len(w)
                    < len(best_index)  # THE SHORTER THE TUPLE, THE BETTER
                ]
                if any(reasons_this_one_is_better):
                    is_primary = is_prime
                    is_referenced = ref
                    best_index = w

            tables.add({
                "name": t.table_name,
                "schema": t.table_schema,
                "id": [b.column_name for b in best_index]
            })

        fact_table = tables[self.settings.fact_table,
                            self.settings.database.schema]
        ids_table = {
            "alias": "t0",
            "name": "__ids__",
            "schema": fact_table.schema,
            "id": fact_table.id
        }
        relations.extend(
            wrap({
                "constraint": {
                    "name": "__link_ids_to_fact_table__"
                },
                "table": ids_table,
                "column": {
                    "name": c
                },
                "referenced": {
                    "table": fact_table,
                    "column": {
                        "name": c
                    }
                },
                "ordinal_position": i
            }) for i, c in enumerate(fact_table.id))
        tables.add(ids_table)

        # GET ALL COLUMNS
        raw_columns = self.db.query("""
            SELECT
                column_name,
                table_schema,
                table_name,
                ordinal_position,
                data_type
            FROM
                information_schema.columns
        """,
                                    param=self.settings.database)

        reference_only_tables = [
            r.split(".")[0] for r in self.settings.reference_only
            if len(r.split(".")) == 2
        ]
        reference_all_tables = [
            r.split(".")[0] for r in self.settings.reference_only
            if len(r.split(".")) == 1
        ]
        foreign_column_table_schema_triples = {(r.column.name, r.table.name,
                                                r.table.schema)
                                               for r in relations}
        referenced_column_table_schema_triples = {
            (r.referenced.column.name, r.referenced.table.name,
             r.referenced.table.schema)
            for r in relations
        }
        related_column_table_schema_triples = foreign_column_table_schema_triples | referenced_column_table_schema_triples

        columns = UniqueIndex(["column.name", "table.name", "table.schema"])
        for c in raw_columns:
            if c.table_name in reference_only_tables:
                if c.table_name + "." + c.column_name in self.settings.reference_only:
                    include = True
                    reference = True
                    foreign = False
                elif c.column_name in tables[(c.table_name,
                                              c.table_schema)].id:
                    include = self.settings.show_foreign_keys
                    reference = False
                    foreign = False
                else:
                    include = False
                    reference = False
                    foreign = False
            elif c.table_name in reference_all_tables:
                # TABLES USED FOR REFERENCE, NO NESTED DOCUMENTS EXPECTED
                if c.column_name in tables[(c.table_name, c.table_schema)].id:
                    include = self.settings.show_foreign_keys
                    reference = True
                    foreign = False
                elif (c.column_name, c.table_name,
                      c.table_schema) in foreign_column_table_schema_triples:
                    include = False
                    reference = False
                    foreign = True
                else:
                    include = True
                    reference = False
                    foreign = False
            elif c.column_name in tables[(c.table_name, c.table_schema)].id:
                include = self.settings.show_foreign_keys
                reference = False
                foreign = False
            elif (c.column_name, c.table_name,
                  c.table_schema) in foreign_column_table_schema_triples:
                include = False
                reference = False
                foreign = True
            elif (c.column_name, c.table_name,
                  c.table_schema) in referenced_column_table_schema_triples:
                include = self.settings.show_foreign_keys
                reference = False
                foreign = False
            else:
                include = True
                reference = False
                foreign = False

            rel = {
                "column": {
                    "name": c.column_name,
                    "type": c.data_type
                },
                "table": {
                    "name": c.table_name,
                    "schema": c.table_schema
                },
                "ordinal_position": c.ordinal_position,
                "is_id": c.column_name
                in tables[(c.table_name, c.table_schema)].id,
                "include": include,  # TRUE IF THIS COLUMN IS OUTPUTTED
                "reference":
                reference,  # TRUE IF THIS COLUMN REPRESENTS THE ROW
                "foreign": foreign  # TRUE IF THIS COLUMN POINTS TO ANOTHER ROW
            }
            columns.add(rel)

        # ITERATE OVER ALL PATHS
        todo = FlatList()
        output_columns = FlatList()
        nested_path_to_join = {}
        all_nested_paths = [["."]]

        def follow_paths(position, path, nested_path, done_relations,
                         no_nested_docs):
            if position.name in self.settings.exclude:
                return
            if DEBUG:
                Log.note("Trace {{path}}", path=path)
            if position.name != "__ids__":
                # USED TO CONFIRM WE CAN ACCESS THE TABLE (WILL THROW ERROR WHEN IF IT FAILS)
                self.db.query("SELECT * FROM " +
                              quote_column(position.name, position.schema) +
                              " LIMIT 1")

            if position.name in reference_all_tables:
                no_nested_docs = True
            if position.name in reference_only_tables:
                return

            curr_join_list = copy(nested_path_to_join[nested_path[0]])

            # INNER OBJECTS
            referenced_tables = list(
                jx.groupby(
                    jx.filter(
                        relations, {
                            "eq": {
                                "table.name": position.name,
                                "table.schema": position.schema
                            }
                        }), "constraint.name"))
            for g, constraint_columns in referenced_tables:
                g = unwrap(g)
                constraint_columns = deepcopy(constraint_columns)
                if g["constraint.name"] in done_relations:
                    continue
                if any(cc for cc in constraint_columns
                       if cc.referenced.table.name in self.settings.exclude):
                    continue

                done_relations.add(g["constraint.name"])

                many_to_one_joins = nested_path_to_join[nested_path[0]]
                index = len(many_to_one_joins)

                alias = "t" + text_type(index)
                for c in constraint_columns:
                    c.referenced.table.alias = alias
                    c.table = position
                many_to_one_joins.append({
                    "join_columns": constraint_columns,
                    "path": path,
                    "nested_path": nested_path
                })

                # referenced_table_path = join_field(split_field(path) + ["/".join(constraint_columns.referenced.table.name)])
                # HANDLE THE COMMON *id SUFFIX
                name = []
                for a, b in zip(constraint_columns.column.name,
                                constraint_columns.referenced.table.name):
                    if a.startswith(b):
                        name.append(b)
                    elif a.endswith("_id"):
                        name.append(a[:-3])
                    else:
                        name.append(a)
                referenced_column_path = join_field(
                    split_field(path) + ["/".join(name)])
                col_pointer_name = relative_field(referenced_column_path,
                                                  nested_path[0])
                # insert into nested1 VALUES (100, 10, 'aaa', -1);
                # id.about.time.nested1 .ref=10
                # id.about.time.nested1 .ref.name
                for col in columns:
                    if col.table.name == constraint_columns[
                            0].referenced.table.name and col.table.schema == constraint_columns[
                                0].referenced.table.schema:
                        col_full_name = concat_field(
                            col_pointer_name, literal_field(col.column.name))

                        if col.is_id and col.table.name == fact_table.name and col.table.schema == fact_table.schema:
                            # ALWAYS SHOW THE ID OF THE FACT
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text_type(c_index),
                                "column":
                                col,
                                "sort":
                                True,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name
                            })
                        elif col.column.name == constraint_columns[
                                0].column.name:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text_type(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name
                                if self.settings.show_foreign_keys else None
                            })
                        elif col.is_id:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text_type(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name
                                if self.settings.show_foreign_keys else None
                            })
                        elif col.reference:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text_type(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_pointer_name
                                if not self.settings.show_foreign_keys else
                                col_full_name  # REFERENCE FIELDS CAN REPLACE THE WHOLE OBJECT BEING REFERENCED
                            })
                        elif col.include:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text_type(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name
                            })

                if position.name in reference_only_tables:
                    continue

                todo.append(
                    Data(position=copy(constraint_columns[0].referenced.table),
                         path=referenced_column_path,
                         nested_path=nested_path,
                         done_relations=copy(done_relations),
                         no_nested_docs=no_nested_docs))

            # NESTED OBJECTS
            if not no_nested_docs:
                for g, constraint_columns in jx.groupby(
                        jx.filter(
                            relations, {
                                "eq": {
                                    "referenced.table.name": position.name,
                                    "referenced.table.schema": position.schema
                                }
                            }), "constraint.name"):
                    g = unwrap(g)
                    constraint_columns = deepcopy(constraint_columns)
                    if g["constraint.name"] in done_relations:
                        continue
                    done_relations.add(g["constraint.name"])

                    many_table = set(constraint_columns.table.name)
                    if not (many_table - self.settings.exclude):
                        continue

                    referenced_column_path = join_field(
                        split_field(path) + ["/".join(many_table)])
                    new_nested_path = [referenced_column_path] + nested_path
                    all_nested_paths.append(new_nested_path)

                    # if new_path not in self.settings.include:
                    #     Log.note("Exclude nested path {{path}}", path=new_path)
                    #     continue
                    one_to_many_joins = nested_path_to_join[
                        referenced_column_path] = copy(curr_join_list)
                    index = len(one_to_many_joins)
                    alias = "t" + text_type(index)
                    for c in constraint_columns:
                        c.table.alias = alias
                        c.referenced.table = position
                    one_to_many_joins.append(
                        set_default({}, g, {
                            "children": True,
                            "join_columns": constraint_columns,
                            "path": path,
                            "nested_path": nested_path
                        }))
                    # insert into nested1 VALUES (100, 10, 'aaa', -1); # id.about.time.nested1 .ref=10# id.about.time.nested1 .ref.name
                    for col in columns:
                        if col.table.name == constraint_columns[
                                0].table.name and col.table.schema == constraint_columns[
                                    0].table.schema:
                            col_full_name = join_field(
                                split_field(referenced_column_path)
                                [len(split_field(new_nested_path[0])):] +
                                [literal_field(col.column.name)])

                            if col.column.name == constraint_columns[
                                    0].column.name:
                                c_index = len(output_columns)
                                output_columns.append({
                                    "table_alias":
                                    alias,
                                    "column_alias":
                                    "c" + text_type(c_index),
                                    "column":
                                    col,
                                    "sort":
                                    col.is_id,
                                    "path":
                                    referenced_column_path,
                                    "nested_path":
                                    new_nested_path,
                                    "put":
                                    col_full_name if
                                    self.settings.show_foreign_keys else None
                                })
                            elif col.is_id:
                                c_index = len(output_columns)
                                output_columns.append({
                                    "table_alias":
                                    alias,
                                    "column_alias":
                                    "c" + text_type(c_index),
                                    "column":
                                    col,
                                    "sort":
                                    col.is_id,
                                    "path":
                                    referenced_column_path,
                                    "nested_path":
                                    new_nested_path,
                                    "put":
                                    col_full_name if
                                    self.settings.show_foreign_keys else None
                                })
                            else:
                                c_index = len(output_columns)
                                output_columns.append({
                                    "table_alias":
                                    alias,
                                    "column_alias":
                                    "c" + text_type(c_index),
                                    "column":
                                    col,
                                    "sort":
                                    col.is_id,
                                    "path":
                                    referenced_column_path,
                                    "nested_path":
                                    new_nested_path,
                                    "put":
                                    col_full_name if col.include else None
                                })

                    todo.append(
                        Data(position=constraint_columns[0].table,
                             path=referenced_column_path,
                             nested_path=new_nested_path,
                             done_relations=copy(done_relations),
                             no_nested_docs=no_nested_docs))

        path = "."
        nested_path = [path]
        nested_path_to_join["."] = [{
            "path":
            path,
            "join_columns": [{
                "referenced": {
                    "table": ids_table
                }
            }],
            "nested_path":
            nested_path
        }]

        todo.append(
            Data(position=ids_table,
                 path=path,
                 nested_path=nested_path,
                 done_relations=set(),
                 no_nested_docs=False))

        while todo:
            item = todo.pop(0)
            follow_paths(**item)

        self.all_nested_paths = all_nested_paths
        self.nested_path_to_join = nested_path_to_join
        self.columns = output_columns
Example #6
0
    def _install_es(self, gigabytes):
        volumes = self.instance.markup.drives

        if not fabric_files.exists("/usr/local/elasticsearch"):
            with cd("/home/ec2-user/"):
                run("mkdir -p temp")

            if not File(LOCAL_JRE).exists:
                Log.error(
                    "Expecting {{file}} on manager to spread to ES instances",
                    file=LOCAL_JRE)
            with cd("/home/ec2-user/temp"):
                run('rm -f ' + JRE)
                put("resources/" + JRE, JRE)
                sudo("rpm -i " + JRE)
                sudo(
                    "alternatives --install /usr/bin/java java /usr/java/default/bin/java 20000"
                )
                run("export JAVA_HOME=/usr/java/default")

            with cd("/home/ec2-user/"):
                run('wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.7.1.tar.gz'
                    )
                run('tar zxfv elasticsearch-1.7.1.tar.gz')
                sudo('mkdir /usr/local/elasticsearch')
                sudo('cp -R elasticsearch-1.7.1/* /usr/local/elasticsearch/')

            with cd('/usr/local/elasticsearch/'):
                # BE SURE TO MATCH THE PLUGLIN WITH ES VERSION
                # https://github.com/elasticsearch/elasticsearch-cloud-aws
                sudo(
                    'bin/plugin -install elasticsearch/elasticsearch-cloud-aws/2.7.1'
                )

            #REMOVE THESE FILES, WE WILL REPLACE THEM WITH THE CORRECT VERSIONS AT THE END
            sudo("rm -f /usr/local/elasticsearch/config/elasticsearch.yml")
            sudo("rm -f /usr/local/elasticsearch/bin/elasticsearch.in.sh")

        self.conn = self.instance.connection

        # MOUNT AND FORMAT THE EBS VOLUMES (list with `lsblk`)
        for i, k in enumerate(volumes):
            if not fabric_files.exists(k.path):
                with fabric_settings(warn_only=True):
                    sudo('sudo umount ' + k.device)

                sudo('yes | sudo mkfs -t ext4 ' + k.device)
                sudo('mkdir ' + k.path)
                sudo('sudo mount ' + k.device + ' ' + k.path)

                #ADD TO /etc/fstab SO AROUND AFTER REBOOT
                sudo("sed -i '$ a\\" + k.device + "   " + k.path +
                     "       ext4    defaults,nofail  0   2' /etc/fstab")

        # TEST IT IS WORKING
        sudo('mount -a')

        # INCREASE THE FILE HANDLE LIMITS
        with cd("/home/ec2-user/"):
            File("./results/temp/sysctl.conf").delete()
            get("/etc/sysctl.conf",
                "./results/temp/sysctl.conf",
                use_sudo=True)
            lines = File("./results/temp/sysctl.conf").read()
            if lines.find("fs.file-max = 100000") == -1:
                lines += "\nfs.file-max = 100000"
            lines = lines.replace("net.bridge.bridge-nf-call-ip6tables = 0",
                                  "")
            lines = lines.replace("net.bridge.bridge-nf-call-iptables = 0", "")
            lines = lines.replace("net.bridge.bridge-nf-call-arptables = 0",
                                  "")
            File("./results/temp/sysctl.conf").write(lines)
            put("./results/temp/sysctl.conf",
                "/etc/sysctl.conf",
                use_sudo=True)

        sudo("sysctl -p")

        # INCREASE FILE HANDLE PERMISSIONS
        sudo("sed -i '$ a\\root soft nofile 50000' /etc/security/limits.conf")
        sudo("sed -i '$ a\\root hard nofile 100000' /etc/security/limits.conf")
        sudo("sed -i '$ a\\root memlock unlimited' /etc/security/limits.conf")

        sudo(
            "sed -i '$ a\\ec2-user soft nofile 50000' /etc/security/limits.conf"
        )
        sudo(
            "sed -i '$ a\\ec2-user hard nofile 100000' /etc/security/limits.conf"
        )
        sudo(
            "sed -i '$ a\\ec2-user memlock unlimited' /etc/security/limits.conf"
        )

        # EFFECTIVE LOGIN TO LOAD CHANGES TO FILE HANDLES
        # sudo("sudo -i -u ec2-user")

        if not fabric_files.exists("/data1/logs"):
            sudo('mkdir /data1/logs')
            sudo('mkdir /data1/heapdump')

            #INCREASE NUMBER OF FILE HANDLES
            # sudo("sysctl -w fs.file-max=64000")
        # COPY CONFIG FILE TO ES DIR
        if not fabric_files.exists(
                "/usr/local/elasticsearch/config/elasticsearch.yml"):
            yml = File("./examples/config/es_config.yml").read().replace(
                "\r", "")
            yml = expand_template(
                yml, {
                    "id":
                    Random.hex(length=8),
                    "data_paths":
                    ",".join("/data" + unicode(i + 1)
                             for i, _ in enumerate(volumes))
                })
            File("./results/temp/elasticsearch.yml").write(yml)
            put("./results/temp/elasticsearch.yml",
                '/usr/local/elasticsearch/config/elasticsearch.yml',
                use_sudo=True)

        # FOR SOME REASON THE export COMMAND DOES NOT SEEM TO WORK
        # THIS SCRIPT SETS THE ES_MIN_MEM/ES_MAX_MEM EXPLICITLY
        if not fabric_files.exists(
                "/usr/local/elasticsearch/bin/elasticsearch.in.sh"):
            sh = File("./examples/config/es_run.sh").read().replace("\r", "")
            sh = expand_template(sh, {"memory": unicode(int(gigabytes / 2))})
            File("./results/temp/elasticsearch.in.sh").write(sh)
            with cd("/home/ec2-user"):
                put("./results/temp/elasticsearch.in.sh",
                    './temp/elasticsearch.in.sh',
                    use_sudo=True)
                sudo(
                    "cp -f ./temp/elasticsearch.in.sh /usr/local/elasticsearch/bin/elasticsearch.in.sh"
                )
Example #7
0
def random_id():
    return Random.hex(40)