def main():
    '''
    Main routine.
    Returns :
        path facts to the invoking ansible play (dict) : returns msacds_certreq_facts dict
                                    "msacds_certreq_facts": {
                                      "cert_full_path": "/tmp/ansiblehost.mydomain.com.p7b",
                                       "err": null
                                        },
                                        "msg": "200:Success"
                                      }

    '''
    spec = ArgumentSpec()
    module = AnsibleModule(
        argument_spec=spec.argument_spec,
        supports_check_mode=spec.supports_check_mode
    )
    try:
        user = module.params['user']	
        credential_cachepath = module.params['credential_cachepath']
        with krbContext(principal=user,	
                ccache_file=credential_cachepath):
          results = _exec_module(module)
          module.exit_json(changed=True,msacds_certreq_facts=results,msg='200:Success')
    except Exception as ex:
        module.fail_json(msg='400:'+str(ex))
    def __init__(self,
                 transport,
                 host,
                 service,
                 mechanism=six.u('GSSAPI'),
                 generate_tickets=False,
                 using_keytab=False,
                 principal=None,
                 keytab_file=None,
                 ccache_file=None,
                 password=None,
                 **sasl_kwargs):
        """
        transport: an underlying transport to use, typically just a TSocket
        host: the name of the server, from a SASL perspective
        service: the name of the server's service, from a SASL perspective
        mechanism: the name of the preferred mechanism to use
        All other kwargs will be passed to the puresasl.client.SASLClient
        constructor.
        """

        self.transport = transport

        if six.PY3:
            self._patch_pure_sasl()
        self.sasl = SASLClient(host, service, mechanism, **sasl_kwargs)

        self.__wbuf = BytesIO()
        self.__rbuf = BytesIO()

        self.generate_tickets = generate_tickets
        if self.generate_tickets:
            self.krb_context = krbContext(using_keytab, principal, keytab_file,
                                          ccache_file, password)
            self.krb_context.init_with_keytab()
Esempio n. 3
0
def hdfs_connect_demo():

    # NOTE 底层会调用 kinit
    with krbContext(using_keytab=True,
                    principal='*****@*****.**',
                    keytab_file='/houleilei.client.keytab'):
        client = KerberosClient('http://hadoop01.stor:50070',
                                hostname_override='hadoop01.stor')
        # client = InsecureClient('http://hadoop01.stor:50070', user='******')
        result = client.list('/home/holyzing/')
        print(type(result), result)
Esempio n. 4
0
def pandas_group():
    """
    从hbase 中读取数据后,使用pandas进行聚合统计
    """
    with krbContext(using_keytab=True, principal=principal,
                    keytab_file=keytab):
        conn = KerberosConnection('hbase02.stor',
                                  protocol='compact',
                                  use_kerberos=True)
        table = conn.table(cm_table)
        columns = ["project", "start_time", "end_time", "cpu", "gpu", "memory"]
        data = table.scan(columns=["infos:%s" % column for column in columns],
                          filter=None)
        # TODO 强行使用 filters
        # "SingleColumnValueFilter('info','project',=,'binary:{0}')
        df = pd.DataFrame(data=[d[1] for d in data])
        print(df.columns, len(df.index))
        df.columns = [str(column).split(":")[1][:-1] for column in df.columns]
        df["start_time"] = df["start_time"].apply(float)
        df["end_time"] = df["end_time"].apply(float)
        df["cpu"] = df["cpu"].apply(int)
        df["gpu"] = df["gpu"].apply(int)
        df["memory"] = df["memory"].apply(int)

        data = []
        project_groups = df.groupby("project", as_index=True)
        for project, group in project_groups.groups.items():
            project_df = df.loc[group]
            month_groups = project_df.groupby(
                by=lambda i: timestamp_to_datestr(
                    project_df.loc[i, "start_time"], project),
                as_index=True)
            df_sum = month_groups.sum()
            print(df_sum.index)
            for month, row in df_sum.iterrows():
                start_time = 0 if pd.isna(
                    row["start_time"]) else row["start_time"]
                end_time = 0 if pd.isna(row["end_time"]) else row["end_time"]
                time_long = (end_time - start_time) / 3600
                time_long = 0 if time_long <= 0 else time_long
                data.append([
                    str(project)[2:-1], month, time_long,
                    row["gpu"] * time_long, row["cpu"] * time_long,
                    row["memory"]
                ])
        data.sort(key=lambda x: (x[0], x[1]))
        columns = ["project", "month", "time", "cpu", "gpu", "memory"]
        df = pd.DataFrame(data=data, columns=columns)
        writer = pd.ExcelWriter("批任务核时月度统计.xls")
        df.to_excel(writer, sheet_name="batch_jobs_info", index=False)
        writer.save()
        writer.close()
Esempio n. 5
0
def hbase_connect_demo():
    with krbContext(using_keytab=True,
                    principal='*****@*****.**',
                    keytab_file='/houleilei.client.keytab'):

        connection = KerberosConnection('hbase02.stor',
                                        protocol='compact',
                                        use_kerberos=True)
        test_table = connection.table('houleilei:test')
        # insert
        test_table.put('row_key_1', {'f1:q1': 'v1'})
        # get data
        print(test_table.row('row_key_1'))
Esempio n. 6
0
def read_csv_to_hbase():
    """
    read_csv_to_hbase
    :return:
    """
    columns = [
        "id", "name", "category", "project", "cluster", "creator", "datasets",
        "result", "status", "start_time", "end_time", "image", "git_url",
        "git_branch", "git_commit", "command", "cpu", "gpu", "spot", "memory",
        "gpu_model", "relation_report"
    ]
    df = pd.read_csv("/home/holyzing/Desktop/batchjob_info.csv",
                     delimiter=",",
                     header=None)
    df.columns = columns
    print(len(df.index))
    with krbContext(using_keytab=True, principal=principal,
                    keytab_file=keytab):
        conn = KerberosConnection('hbase02.stor',
                                  protocol='compact',
                                  use_kerberos=True)
        table = conn.table(cm_table)
        # batch = table.batch()
        # batch.put()
        for index, row in df.iterrows():
            data = {f"infos:{column}": str(row[column]) for column in columns}
            data["infos:cpu"] = re.search(r"\d+", data["infos:cpu"]).group()
            data["infos:gpu"] = re.search(r"\d+", data["infos:gpu"]).group()
            data["infos:memory"] = re.search(r"\d+",
                                             data["infos:memory"]).group()
            data["infos:spot"] = re.search(r"true|false",
                                           data["infos:spot"]).group()
            data["infos:gpu_model"] = data["infos:gpu_model"].split(
                ": ")[1][1:-3]
            table.put(str(index), data)
        conn.close()
Esempio n. 7
0
def spark_hbase():
    sc = SparkConf().setAppName("sparkHbase").setMaster("local[2]")
    sc.set("spark.cores.max", 8)
    sc.set("spark.executor.cores", 2)
    sc.set("spark.executor.memory", "1g")
    sc.set("spark.executor.pyspark.memory", "50m")

    # cluster mode 下 executor 端应该也需要 Hbase 下的 lib
    # Hbase 1.4.13 下的 netty 包对于 spark 2.4 以上版本来说太旧
    # 各环境版本中各 jar 包 版本不一致的问题真令人头疼

    # client 模式下无效
    sc.set("spark.driver.extraClassPath",
           "/home/holyzing/snap/apache/extrajars/*")
    # sc.set("spark.executor.extraClassPath", "/home/holyzing/snap/apache/hbase-1.4.13/lib/*")
    # sc.set("jars", ["/home/holyzing/snap/apache/extrajars/spark-examples_2.10-1.1.1.jar,\
    #                 /home/holyzing/snap/apache/hbase-1.4.13/lib/hbase-server-1.4.13.jar,\
    #                 /home/holyzing/snap/apache/hbase-1.4.13/lib/hbase-common-1.4.13.jar,\
    #                 /home/holyzing/snap/apache/hbase-1.4.13/lib/hbase-client-1.4.13.jar"])
    spark = SparkSession.builder.config(
        conf=sc).enableHiveSupport().getOrCreate()
    # spark = SparkContext(conf=sc)

    yz_quorum = "hadoop01:2181,hadoop02:2181,hadoop03:2181"
    yz_table, yz_pre = "default:job", "info"

    columns = ["project", "start_time", "end_time", "cpu", "gpu"]  # "memory"
    columns = [f"{yz_pre}:%s" % column for column in columns]

    hbase_conf = {
        "hbase.zookeeper.quorum": cm_quorum,
        "hbase.mapreduce.inputtable": cm_table,
        # "hbase.mapreduce.scan.row.start": '_',
        # "hbase.mapreduce.scan.row.stop": '_',
        # "hbase.mapreduce.scan.column.family": "info",
        # TODO columns 只能选取一列? 缺省后也不是选取全部列,family 如何 配置 ????
        # TODO 这么多配置项中似乎没有配置 过滤器的选项 ????
        "hbase.mapreduce.scan.columns": "infos:project"  # " ".join(columns)
    }

    # 在 cluster mode 下,Executor 中应该也需要这两个 类
    keyConv = "org.apache.spark.examples.pythonconverters.ImmutableBytesWritableToStringConverter"
    valueConv = "org.apache.spark.examples.pythonconverters.HBaseResultToStringConverter"

    # help(spark.sparkContext.newAPIHadoopRDD)
    # TODO 无法连接,spark 无法将 认证信息带到 HBase
    with krbContext(using_keytab=True,
                    principal='*****@*****.**',
                    keytab_file=keytab):
        # 在 cluster mode 下,Executor 中应该也需要这两个 类,不是单单在 Driver 端需要
        hbase_rdd: RDD = spark.sparkContext.newAPIHadoopRDD(
            "org.apache.hadoop.hbase.mapreduce.TableInputFormat",
            "org.apache.hadoop.hbase.io.ImmutableBytesWritable",
            "org.apache.hadoop.hbase.client.Result",
            keyConverter=keyConv,
            valueConverter=valueConv,
            conf=hbase_conf)

    # hbase_rdd.collect()
    # help(hbase_rdd)
    fisrt_rdd = hbase_rdd.first()
    print(fisrt_rdd)
    print(hbase_rdd.count())
    # hbase_rdd.foreach(print)

    # 将 Hbase 数据映射为 Hive 的表 (性能会下降,速度会变慢)
    spark.stop()
import requests
import json
from requests_kerberos import HTTPKerberosAuth
from krbcontext import krbContext

API_ENDPOINT = 'http://10.0.10.133:8999/batches'

headers = {'Content-Type': 'application/json'}
data2 = open('job.json')
json_data = json.load(data2)

with krbContext(using_keytab=True,
                principal='*****@*****.**',
                keytab_file='/etc/security/keytabs/user.keytab',
                ccache_file='/home/user/yang/krb5cc_post'):

    r = requests.post(url=API_ENDPOINT,
                      data=json.dumps(json_data),
                      headers=headers,
                      verify=False,
                      auth=HTTPKerberosAuth())
    print(r)
    print(r.json())
Esempio n. 9
0
import requests
from krbcontext import krbContext

from requests_kerberos import HTTPKerberosAuth, OPTIONAL 

with krbContext(using_keytab=True,
           principal='INGESTION/asm.enigma.com',
           keytab_file='/var/lib/ingestion/ingestion.keytab'):
  pass

kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL, sanitize_mutual_error_response=False)
r = requests.get("http://files.enigma.com/cities.csv", auth=kerberos_auth)
#r = requests.get("https://www.petmd.com/sites/default/files/petmd-cat-happy-10.jpg", auth=kerberos_auth)

print(r.status_code)
print(r.content)
import requests
import json
from requests_kerberos import HTTPKerberosAuth, REQUIRED
from krbcontext import krbContext

import logging
logging.basicConfig(filename='post.log',level=logging.DEBUG)

API_ENDPOINT = 'http://10.0.10.133:8999/batches'

headers = {
  'Content-Type': 'application/json'
}
data2 = open('job.json')
json_data = json.load(data2)

with krbContext(using_keytab=True, principal='liveadmin@HDPCLUSTER', keytab_file='C:\yang\liveadmin.keytab',ccache_file='C:\yang\krb5cc_post2'):
    kerberos_auth = HTTPKerberosAuth(principal="[email protected]:cloudera")
    r = requests.post(url=API_ENDPOINT, data=json.dumps(json_data), headers=headers, verify=False, auth=kerberos_auth)
    print(r.text)