Example #1
0
def save_mongo_py(file, author_id, filename):
    client = pymongo.MongoClient(settings.MONGO_DB_URI)
    db = client.mae
    try:
        data = open(file, encoding='utf-8')
        fileName = filename.replace((filename.split('.')[-1]), (filename.split('.')[-1]).lower())
        fileData = []
        for line in data:
            fileData.append(line)
        jsonData = {
            'fileName': fileName,
            'userID': author_id,
            'fileData': fileData
        }

    except Exception as e:
        data = open(file, encoding='gbk')
        fileName = filename.replace((filename.split('.')[-1]), (filename.split('.')[-1]).lower())
        fileData = []
        for line in data:
            fileData.append(line)
        jsonData = {
            'fileName': fileName,
            'userID': author_id,
            'fileData': fileData
        }

    object_id = db.insert(jsonData)
    object_id = string_type(object_id)
    client.close()
    data.close()
    os.remove(file)  # 删除存在本地的文件,本地不做保存
    return object_id
Example #2
0
 def saveCodeToMongodb(self, code, configuration):
     """保存算法代码到mongodb"""
     mongoCli = cli.mark.algo
     OBJ = mongoCli.insert({"code": code, "configuration": configuration})
     OBJ_ID = string_type(OBJ)
     cli.close()
     return OBJ_ID
Example #3
0
 def saveModelToMongodb(self, model):
     """保存算法到mongodb"""
     mongoCli = cli.mark.models
     OBJ = mongoCli.insert({"model": model})
     OBJ_ID = string_type(OBJ)
     cli.close()
     return OBJ_ID
Example #4
0
    def get(self, request):
        mg_client = pymongo.MongoClient(MONGO_DB_URI)
        db = mg_client.datahoop.data
        id = request.user.id
        try:
            all_file_id = request.GET.get('file_id')
            all_file_id = eval(all_file_id)
            obj = DataSource.objects
            try:
                for file_id in all_file_id:
                    fileName = DataSource.objects.get(id=file_id).file_name

                    if DataSource.objects.get(id=file_id).where == 'mongodb':
                        object_id = DataSource.objects.get(id=file_id).obj_id

                        data = db.find_one({'_id': ObjectId(object_id)})['fileData']
                        jsonData = {
                            'fileName': str(fileName),
                            'userID': id,
                            'fileData': data
                        }
                        object_id = db.insert(jsonData)
                        object_id = string_type(object_id)
                        mg_client.close()
                        obj.create(user_id=id, file_name=str(fileName), where='mongodb', obj_id=object_id)
                    else:
                        format_filename = DataSource.objects.get(id=file_id).format_filename
                        obj.create(user_id=id, file_name=str(fileName), format_filename=format_filename, where='hdfs')
            except Exception as  e:
                return JsonResponse({'status': False, 'msg': '添加失败'})
            return JsonResponse({'status': True, 'msg': '添加成功'})
        except Exception as e:
            print(e)
            return JsonResponse({'status': False, 'msg': '添加失败'})
Example #5
0
def save_mongo_txt(file, author_id, isHeader, separator, filename):
    try:
        client = pymongo.MongoClient(MONGO_DB_URI)
        db = client.datahoop.data
        with open(file, 'rb') as f:  # 判断文件的编码
            data_type = chardet.detect(f.readline())['encoding']
        with open(file, 'r', encoding=data_type, errors='ignore') as f1:
            data = pd.read_csv(f1, delimiter=separator, dtype=str)
        print(type(data))
        fileName = filename.replace((filename.split('.')[-1]),
                                    (filename.split('.')[-1]).lower()).replace(
                                        (filename.split('.')[-1]),
                                        (filename.split('.')[-1]).lower())
        if isHeader == 1:
            Data = [list(data.columns)] + data.values.tolist()
        else:
            all = data.shape[1]
            len_lines = []
            for i in range(all):
                len_lines.append('_C' + str(i))
            Data = [list(len_lines)] + [list(data.columns)
                                        ] + data.values.tolist()
        jsonData = {
            'fileName': fileName,
            'userID': author_id,
            'fileData': Data
        }
        object_id = db.insert(jsonData)
        object_id = string_type(object_id)
        client.close()
        return object_id
    except Exception as e:
        return 'none'
Example #6
0
def save_data_mongodb(datas):
    """
    save datas to mongodb
    :param datas:
    :return:
    """
    try:
        client = pymongo.MongoClient(settings.MONGO_DB_URI)
        db = client.aduspider.data
        jsonData = {'MapData': datas}
        object_id = db.insert(jsonData)
        object_id = string_type(object_id)
        client.close()
        return object_id
    except Exception as e:
        logger.info('Mongodb connection failed for the following reasons: {}'.format(e))
        return 'error'
Example #7
0
def thirdry(author_id):
    client = pymongo.MongoClient(MONGO_DB_URI)
    db = client.netease_music.song
    a = []
    for i in db.find().limit(1):
        a.append(list(i)[1:])
    for i in (db.find({}).limit(1000)):
        s = []
        for item in list(i)[1:]:
            s.append(i[item])
        a.append(s)
    jsonData = {'fileName': 'netease', 'userID': author_id, 'fileData': a}

    object_id = db.insert(jsonData)
    object_id = string_type(object_id)

    client.close()
    return object_id
Example #8
0
def save_mongo_sql(file, author_id):
    import subprocess
    # sql = 'mysql --defaults-extra-file=/etc/mysql/fabric.cfg testmysql < %s' % file
    sql = 'mysql  testsql < %s' % file
    subprocess.call(sql, shell=True)
    content = open(file).read()
    table_name = (re.findall("DROP TABLE IF EXISTS `(.+)`", content))[0]
    client = pymongo.MongoClient(settings.MONGO_DB_URI)
    db = client.datahoop.data
    con = pymysql.connect('172.17.0.100', 'root', 'root', 'testsql')
    with con:
        # 仍然是,第一步要获取连接的 cursor 对象,用于执行查询
        cur = con.cursor()
        sql = "select DISTINCT (COLUMN_NAME) from information_schema.COLUMNS where table_name = '%s'"
        cur.execute(sql % (table_name))
        rows = cur.fetchall()
        rels = []
        rel = []
        for i in rows:
            rel.append(i[0])
        rels.append(rel)
        # 类似于其他语言的 query 函数, execute 是 python 中的执行查询函数
        cur.execute("SELECT * FROM  %s" % (table_name))
        # 使用 fetchall 函数,将结果集(多维元组)存入 rows 里面
        rows = cur.fetchall()
        # 依次遍历结果集,发现每个元素,就是表中的一条记录,用一个元组来显示
        for row in rows:
            rels.append(list(row))
        jsonData = {
            'fileName': table_name + '.sql',
            'userID': author_id,
            'fileData': rels
        }
        object_id = db.insert(jsonData)
        object_id = string_type(object_id)
        client.close()
        cur.close()
        os.remove(file)  # 删除存在本地的文件,本地不做保存
        return object_id
Example #9
0
def save_mongo_sql(file, author_id):
    client = pymongo.MongoClient(MONGO_DB_URI)
    db = client.datahoop.data
    data_list = []
    with open(file, 'rb') as f:  # 判断文件的编码
        data_type = chardet.detect(f.readline())['encoding']
    with open(file, 'r', encoding=data_type, errors='ignore') as f1:
        for i in f1.readlines():
            data_list.append(
                re.findall(r'[^()]+', i.replace("'", ''))[1].split(','))
    data_list_table = []
    for i in range(len(data_list[0])):
        data_list_table.append('_C' + str(i))
    data_list.insert(0, data_list_table)
    jsonData = {
        'fileName': file.rsplit('\\', 1)[-1],
        'userID': author_id,
        'fileData': data_list
    }
    object_id = db.insert(jsonData)
    object_id = string_type(object_id)
    client.close()
    return object_id
Example #10
0
def LogisticR_spark(filepath,feature_columns,label_columns,maxiter, regparam, elasticnetparam):
    '''
    :param filepath: 文件路径
    :param feature_columns: 特征变量 列号
    :param label_columns: 标签变量 列号
    :param maxiter: 迭代次数
    :param regparam: 正则化参数(>=0)
    :param elasticnetparam:Elasticnet混合参数,0-1之间,当为0时,惩罚为L2正则化,当为1时为L1正则化
    :return: 字典 模型展示结果存objct_id,预测拟合结果和模型存mongodb
    '''
    msg = {'status': True, 'error': None,  'data': None}
    try:
        NAME = "LogisticRegression"
        CLASS = "Classify"
        spark = SparkSession.builder.appName("myModel").getOrCreate()
        train_filepath = filepath[0]
        test_filepath = filepath[1]
        train_df = spark.read.csv(train_filepath,inferSchema=True)
        test_df = spark.read.csv(test_filepath, inferSchema=True)
        feature_colname = [train_df.columns[ii] for ii in feature_columns]
        featuresCreator = VectorAssembler(inputCols=feature_colname, outputCol="features")
        train_df.show()
        logr = LogisticRegression(maxIter=maxiter, regParam=regparam,
                                  elasticNetParam=elasticnetparam,labelCol=train_df.columns[label_columns],featuresCol="features")
        # 创建一个管道
        from pyspark.ml import Pipeline
        pipeline = Pipeline(stages=[featuresCreator, logr])
        model = pipeline.fit(train_df)
        test_model = model.transform(test_df)
        uuid_name1 = str(uuid.uuid1())
        filepath_result = os.path.join(filepath_result_DIR, uuid_name1)
        test_model.show()

        print(type(test_model),filepath_result)
        test_model.write.save(filepath_result)
        # uuid_name = str(uuid.uuid1())
        # file_result_DIR = "hdfs://master:9000/datahoop/filepath_result/"
        # filepath_result1 = os.path.join(file_result_DIR, uuid_name)
        # df11 = spark.read.parquet(filepath_result)
        # df11.write.csv(filepath_result1)
        # df11.show()
        Test_Model = test_model.toPandas()[0:21]
        Test_Model_title = list(Test_Model.columns)
        Test_Model_Result = [Test_Model_title] + Test_Model.values.tolist()
        #print(type(str(Test_Model_Result[1][8])),str(Test_Model_Result),"qqqqqqqqqqqqqqq")
        #评价模型性能
        evaluator = BinaryClassificationEvaluator(rawPredictionCol="probability",labelCol=train_df.columns[label_columns])
        #测试预测结果和模型以路径形式存hdfs
        #from pyspark.ml import PipelineModel
        uuid_name2 = str(uuid.uuid1())
        filepath_model = os.path.join(filepath_model_DIR, uuid_name2)
        model.write().overwrite().save(filepath_model)
        output = {}
        output["function_name"] = NAME
        output["function_class"] = CLASS
        output["Test_Model_Result"] = str(Test_Model_Result)
        output["areaUnderROC"] = float(evaluator.evaluate(test_model,{evaluator.metricName: "areaUnderROC"}))
        output["areaUnderPR"] = float(evaluator.evaluate(test_model,{evaluator.metricName: "areaUnderPR"}))
        import pymongo
        from bson.objectid import string_type
        #from settings import mongodbUri
        client = pymongo.MongoClient(mongodbUri)
        db = client.mark.algo_collection
        jsonData = {
            'fileName': NAME,
            'userID': 2,
            'fileData': output
        }
        OBJ = db.insert(jsonData)
        OBJ_ID = string_type(OBJ)
        client.close()
        out_result = {}
        out_result["OBJ_ID"] = OBJ_ID
        out_result["file_name"] = [uuid_name1,uuid_name2]
        out_result["filepath"] = [filepath_result,filepath_model]
        msg["data"] = out_result
    except Exception as e:
        msg["status"] = False
        msg["error"] = '执行失败:%s' % e
    msg = json.dumps(msg)
    return msg
Example #11
0
def save_mongo_csv(file, author_id, isHeader, separator, filename):
    client = pymongo.MongoClient(settings.MONGO_DB_URI)
    db = client.datahoop.data
    try:
        fr = open(file, mode='r', encoding='gbk')
        dlm = separator
        csv_reader = csv.reader(fr, delimiter=dlm)
        data = list(csv_reader)
        print(data)
        fileName = filename.replace((filename.split('.')[-1]), (filename.split('.')[-1]).lower()).replace(
            (filename.split('.')[-1]), (filename.split('.')[-1]).lower())
        fileData = []
        if isHeader == "True":
            for line in data:
                fileData.append(line)
        else:
            all_len_lines = []

            for line in data:
                all_len_lines.append(len(line))
                fileData.append(line)
            len_lines = []
            for i in range(max(all_len_lines)):
                len_lines.append('A' + str(i + 1))
            fileData.insert(0, len_lines)
            print(fileData)
            print('ooo')

        jsonData = {
            'fileName': fileName,
            'userID': author_id,
            'fileData': fileData
        }

        object_id = db.insert(jsonData)
        object_id = string_type(object_id)
    except Exception as e:
        fr = open(file, mode='r')
        dlm = separator
        csv_reader = csv.reader(fr, delimiter=dlm)
        data = list(csv_reader)
        print(data)
        fileName = filename.replace((filename.split('.')[-1]), (filename.split('.')[-1]).lower())
        fileData = []
        if isHeader == "True":
            for line in data:
                fileData.append(line)
        else:
            all_len_lines = []

            for line in data:
                all_len_lines.append(len(line))
                fileData.append(line)
            len_lines = []
            for i in range(max(all_len_lines)):
                len_lines.append('A' + str(i + 1))
            fileData.insert(0, len_lines)
            print(fileData)
            print('ooo')
        jsonData = {
            'fileName': fileName,
            'userID': author_id,
            'fileData': fileData
        }

        object_id = db.insert(jsonData)
        object_id = string_type(object_id)

    client.close()
    os.remove(file)  # 删除存在本地的文件,本地不做保存
    return object_id
Example #12
0
def save_mongo_txt(file, author_id, isHeader, separator, filename):
    client = pymongo.MongoClient(settings.MONGO_DB_URI)
    db = client.datahoop.data
    print(isHeader, 'pppppppppppppppppppppp')
    try:
        data = open(file, encoding='utf-8')
        fileName = filename.replace((filename.split('.')[-1]), (filename.split('.')[-1]).lower())
        fileData = []
        if isHeader == "True":
            for line in data:
                fileData.append(line.replace('\n', '').split(separator))
        else:
            all_len_lines = []

            for line in data:
                all_len_lines.append(len(line.replace('\n', '').split(separator)))
                fileData.append(line.replace('\n', '').split(separator))
            len_lines = []
            for i in range(max(all_len_lines)):
                len_lines.append('A' + str(i + 1))
            fileData.insert(0, len_lines)
        print(fileData)
        print('ppp')
        jsonData = {
            'fileName': fileName,
            'userID': author_id,
            'fileData': fileData
        }

    except Exception as e:
        data = open(file, encoding='gb18030', errors='ignore')
        fileName = filename.replace((filename.split('.')[-1]), (filename.split('.')[-1]).lower())
        fileData = []
        if isHeader == "True":
            for line in data:
                fileData.append(line.replace('\n', '').split(separator))
            print(fileData)
            print('ooo')
            print('dddddddddddddddddddddddddddddddddd')
        else:
            all_len_lines = []

            for line in data:
                all_len_lines.append(len(line.replace('\n', '').split(separator)))
                fileData.append(line.replace('\n', '').split(separator))
            len_lines = []
            for i in range(max(all_len_lines)):
                len_lines.append('A' + str(i + 1))
            fileData.insert(0, len_lines)
            print(fileData)
            print('ooo')

        jsonData = {
            'fileName': fileName,
            'userID': author_id,
            'fileData': fileData
        }

    object_id = db.insert(jsonData)
    object_id = string_type(object_id)
    client.close()
    os.remove(file)  # 删除存在本地的文件,本地不做保存
    return object_id
Example #13
0
def modelJson(request):
    """
        获取和保存模型<br>
        MJson:模型结果<br>
        MId:模型ID<br>
        modelName:模型名称<br>
        remark:模型备注<br>
        labelList:模型列表<br>
    """

    msg = {'status': True, 'data': None}
    _phone = request.user.id
    if request.method == "GET":
        _id = request.GET.get('modelid', '')
        _modelName = request.GET.get("modelName")
        try:

            obj = ModelResult.objects.get(id=_id)
            mongoCli = cli.mark.models
            msg["mjson"] = mongoCli.find_one({"_id":
                                              ObjectId(obj.OBJID)})["models"]
        except Exception as e:
            msg["status"] = False
            msg["error"] = "获取失败"
            logger.error('获取模型失败:{0}'.format(e))
        logger.info('获取模型列表:{0}'.format(msg))
        return JsonResponse(msg)

    elif request.method == "POST":
        _mJson = request.POST.get("MJson")
        logger.info("模型长度:{}".format(len(_mJson)))
        _mid = request.POST.get("MId", "")
        _remark = request.POST.get("remark", "")
        _modelName = request.POST.get("modelName")
        _labelList = json.loads(request.POST.get("labelList", [1, 2, 3]))
        print(type(_labelList), _labelList)
        _author = request.user  # 获取用户对象
        logger.debug("模型保存请求参数:{0}--{1}--{2}".format(_modelName, _labelList,
                                                     _remark))

        try:
            if not _mid:
                '''没有MID值 则 判断:更新还是新建'''
                if ModelResult.objects.filter(ModelName=_modelName).count():
                    '''没有mid 但是modelName已经存在'''
                    msg['status'] = False
                    msg['error'] = '保存失败:模型名已存在,换个名字试试吧!'
                    logger.info(
                        '模型名已存在无法保存--用户:{0} ; 模型ID:{1}, 模型名称:{2}'.format(
                            _phone, _mid, _modelName))
                else:
                    '''创建一条模型记录'''
                    mongoCli = cli.mark.models
                    OBJ = mongoCli.insert({"models": _mJson})
                    OBJ_ID = string_type(OBJ)
                    cli.close()
                    objID, status = ModelResult.objects.get_or_create(
                        user=_author,
                        ModelName=_modelName,
                        OBJID=OBJ_ID,
                        remark=_remark,
                    )
                    # 保存标签
                    for i in _labelList:
                        labelObj = Model_Label.objects.get(id=i)
                        objID.label.add(labelObj)
                    objID.save()
                    logger.info("objID:{0}--状态:{1}".format(objID, status))
                    msg['modelid'] = objID.id
                    logger.info(
                        '保存成功--用户:{0} ; 模型ID:{1}, 模型名称:{2},数据:{3}'.format(
                            _phone, _mid, _modelName, OBJ_ID))

            else:
                '''更新模型'''
                if ModelResult.objects.filter(ModelName=_modelName).exclude(
                        id=_mid).count():
                    '''modleName保持唯一'''
                    msg['status'] = False
                    msg['error'] = '保存失败:模型名已存在,换个名字试试吧!'
                    logger.info(
                        '模型名已存在无法保存--用户:{0} ; 模型ID:{1}, 模型名称:{2}'.format(
                            _phone, _mid, _modelName))

                else:
                    obj = ModelResult.objects.get(id=_mid)
                    obj.label.add(id=1)
                    ModelResult.objects.filter(id=_mid).update(
                        ModelName=_modelName)
                    mongoCli = cli.mark.models
                    mongoCli.update({'_id': ObjectId(obj.OBJID)},
                                    {"models": _mJson})
                    cli.close()
                    msg['modelid'] = _mid
                    logger.info(
                        '更新成功--用户:{0} ; 模型ID:{1}, 模型名称:{2},数据:{3}'.format(
                            _phone, _mid, _modelName, obj.OBJID))

        except Exception as e:
            logger.error('模型保存失败:{0}'.format(e))
            msg['status'] = False
            msg['error'] = '保存失败!'
        return JsonResponse(msg)