def save_mongo_py(file, author_id, filename): client = pymongo.MongoClient(settings.MONGO_DB_URI) db = client.mae try: data = open(file, encoding='utf-8') fileName = filename.replace((filename.split('.')[-1]), (filename.split('.')[-1]).lower()) fileData = [] for line in data: fileData.append(line) jsonData = { 'fileName': fileName, 'userID': author_id, 'fileData': fileData } except Exception as e: data = open(file, encoding='gbk') fileName = filename.replace((filename.split('.')[-1]), (filename.split('.')[-1]).lower()) fileData = [] for line in data: fileData.append(line) jsonData = { 'fileName': fileName, 'userID': author_id, 'fileData': fileData } object_id = db.insert(jsonData) object_id = string_type(object_id) client.close() data.close() os.remove(file) # 删除存在本地的文件,本地不做保存 return object_id
def saveCodeToMongodb(self, code, configuration): """保存算法代码到mongodb""" mongoCli = cli.mark.algo OBJ = mongoCli.insert({"code": code, "configuration": configuration}) OBJ_ID = string_type(OBJ) cli.close() return OBJ_ID
def saveModelToMongodb(self, model): """保存算法到mongodb""" mongoCli = cli.mark.models OBJ = mongoCli.insert({"model": model}) OBJ_ID = string_type(OBJ) cli.close() return OBJ_ID
def get(self, request): mg_client = pymongo.MongoClient(MONGO_DB_URI) db = mg_client.datahoop.data id = request.user.id try: all_file_id = request.GET.get('file_id') all_file_id = eval(all_file_id) obj = DataSource.objects try: for file_id in all_file_id: fileName = DataSource.objects.get(id=file_id).file_name if DataSource.objects.get(id=file_id).where == 'mongodb': object_id = DataSource.objects.get(id=file_id).obj_id data = db.find_one({'_id': ObjectId(object_id)})['fileData'] jsonData = { 'fileName': str(fileName), 'userID': id, 'fileData': data } object_id = db.insert(jsonData) object_id = string_type(object_id) mg_client.close() obj.create(user_id=id, file_name=str(fileName), where='mongodb', obj_id=object_id) else: format_filename = DataSource.objects.get(id=file_id).format_filename obj.create(user_id=id, file_name=str(fileName), format_filename=format_filename, where='hdfs') except Exception as e: return JsonResponse({'status': False, 'msg': '添加失败'}) return JsonResponse({'status': True, 'msg': '添加成功'}) except Exception as e: print(e) return JsonResponse({'status': False, 'msg': '添加失败'})
def save_mongo_txt(file, author_id, isHeader, separator, filename): try: client = pymongo.MongoClient(MONGO_DB_URI) db = client.datahoop.data with open(file, 'rb') as f: # 判断文件的编码 data_type = chardet.detect(f.readline())['encoding'] with open(file, 'r', encoding=data_type, errors='ignore') as f1: data = pd.read_csv(f1, delimiter=separator, dtype=str) print(type(data)) fileName = filename.replace((filename.split('.')[-1]), (filename.split('.')[-1]).lower()).replace( (filename.split('.')[-1]), (filename.split('.')[-1]).lower()) if isHeader == 1: Data = [list(data.columns)] + data.values.tolist() else: all = data.shape[1] len_lines = [] for i in range(all): len_lines.append('_C' + str(i)) Data = [list(len_lines)] + [list(data.columns) ] + data.values.tolist() jsonData = { 'fileName': fileName, 'userID': author_id, 'fileData': Data } object_id = db.insert(jsonData) object_id = string_type(object_id) client.close() return object_id except Exception as e: return 'none'
def save_data_mongodb(datas): """ save datas to mongodb :param datas: :return: """ try: client = pymongo.MongoClient(settings.MONGO_DB_URI) db = client.aduspider.data jsonData = {'MapData': datas} object_id = db.insert(jsonData) object_id = string_type(object_id) client.close() return object_id except Exception as e: logger.info('Mongodb connection failed for the following reasons: {}'.format(e)) return 'error'
def thirdry(author_id): client = pymongo.MongoClient(MONGO_DB_URI) db = client.netease_music.song a = [] for i in db.find().limit(1): a.append(list(i)[1:]) for i in (db.find({}).limit(1000)): s = [] for item in list(i)[1:]: s.append(i[item]) a.append(s) jsonData = {'fileName': 'netease', 'userID': author_id, 'fileData': a} object_id = db.insert(jsonData) object_id = string_type(object_id) client.close() return object_id
def save_mongo_sql(file, author_id): import subprocess # sql = 'mysql --defaults-extra-file=/etc/mysql/fabric.cfg testmysql < %s' % file sql = 'mysql testsql < %s' % file subprocess.call(sql, shell=True) content = open(file).read() table_name = (re.findall("DROP TABLE IF EXISTS `(.+)`", content))[0] client = pymongo.MongoClient(settings.MONGO_DB_URI) db = client.datahoop.data con = pymysql.connect('172.17.0.100', 'root', 'root', 'testsql') with con: # 仍然是,第一步要获取连接的 cursor 对象,用于执行查询 cur = con.cursor() sql = "select DISTINCT (COLUMN_NAME) from information_schema.COLUMNS where table_name = '%s'" cur.execute(sql % (table_name)) rows = cur.fetchall() rels = [] rel = [] for i in rows: rel.append(i[0]) rels.append(rel) # 类似于其他语言的 query 函数, execute 是 python 中的执行查询函数 cur.execute("SELECT * FROM %s" % (table_name)) # 使用 fetchall 函数,将结果集(多维元组)存入 rows 里面 rows = cur.fetchall() # 依次遍历结果集,发现每个元素,就是表中的一条记录,用一个元组来显示 for row in rows: rels.append(list(row)) jsonData = { 'fileName': table_name + '.sql', 'userID': author_id, 'fileData': rels } object_id = db.insert(jsonData) object_id = string_type(object_id) client.close() cur.close() os.remove(file) # 删除存在本地的文件,本地不做保存 return object_id
def save_mongo_sql(file, author_id): client = pymongo.MongoClient(MONGO_DB_URI) db = client.datahoop.data data_list = [] with open(file, 'rb') as f: # 判断文件的编码 data_type = chardet.detect(f.readline())['encoding'] with open(file, 'r', encoding=data_type, errors='ignore') as f1: for i in f1.readlines(): data_list.append( re.findall(r'[^()]+', i.replace("'", ''))[1].split(',')) data_list_table = [] for i in range(len(data_list[0])): data_list_table.append('_C' + str(i)) data_list.insert(0, data_list_table) jsonData = { 'fileName': file.rsplit('\\', 1)[-1], 'userID': author_id, 'fileData': data_list } object_id = db.insert(jsonData) object_id = string_type(object_id) client.close() return object_id
def LogisticR_spark(filepath,feature_columns,label_columns,maxiter, regparam, elasticnetparam): ''' :param filepath: 文件路径 :param feature_columns: 特征变量 列号 :param label_columns: 标签变量 列号 :param maxiter: 迭代次数 :param regparam: 正则化参数(>=0) :param elasticnetparam:Elasticnet混合参数,0-1之间,当为0时,惩罚为L2正则化,当为1时为L1正则化 :return: 字典 模型展示结果存objct_id,预测拟合结果和模型存mongodb ''' msg = {'status': True, 'error': None, 'data': None} try: NAME = "LogisticRegression" CLASS = "Classify" spark = SparkSession.builder.appName("myModel").getOrCreate() train_filepath = filepath[0] test_filepath = filepath[1] train_df = spark.read.csv(train_filepath,inferSchema=True) test_df = spark.read.csv(test_filepath, inferSchema=True) feature_colname = [train_df.columns[ii] for ii in feature_columns] featuresCreator = VectorAssembler(inputCols=feature_colname, outputCol="features") train_df.show() logr = LogisticRegression(maxIter=maxiter, regParam=regparam, elasticNetParam=elasticnetparam,labelCol=train_df.columns[label_columns],featuresCol="features") # 创建一个管道 from pyspark.ml import Pipeline pipeline = Pipeline(stages=[featuresCreator, logr]) model = pipeline.fit(train_df) test_model = model.transform(test_df) uuid_name1 = str(uuid.uuid1()) filepath_result = os.path.join(filepath_result_DIR, uuid_name1) test_model.show() print(type(test_model),filepath_result) test_model.write.save(filepath_result) # uuid_name = str(uuid.uuid1()) # file_result_DIR = "hdfs://master:9000/datahoop/filepath_result/" # filepath_result1 = os.path.join(file_result_DIR, uuid_name) # df11 = spark.read.parquet(filepath_result) # df11.write.csv(filepath_result1) # df11.show() Test_Model = test_model.toPandas()[0:21] Test_Model_title = list(Test_Model.columns) Test_Model_Result = [Test_Model_title] + Test_Model.values.tolist() #print(type(str(Test_Model_Result[1][8])),str(Test_Model_Result),"qqqqqqqqqqqqqqq") #评价模型性能 evaluator = BinaryClassificationEvaluator(rawPredictionCol="probability",labelCol=train_df.columns[label_columns]) #测试预测结果和模型以路径形式存hdfs #from pyspark.ml import PipelineModel uuid_name2 = str(uuid.uuid1()) filepath_model = os.path.join(filepath_model_DIR, uuid_name2) model.write().overwrite().save(filepath_model) output = {} output["function_name"] = NAME output["function_class"] = CLASS output["Test_Model_Result"] = str(Test_Model_Result) output["areaUnderROC"] = float(evaluator.evaluate(test_model,{evaluator.metricName: "areaUnderROC"})) output["areaUnderPR"] = float(evaluator.evaluate(test_model,{evaluator.metricName: "areaUnderPR"})) import pymongo from bson.objectid import string_type #from settings import mongodbUri client = pymongo.MongoClient(mongodbUri) db = client.mark.algo_collection jsonData = { 'fileName': NAME, 'userID': 2, 'fileData': output } OBJ = db.insert(jsonData) OBJ_ID = string_type(OBJ) client.close() out_result = {} out_result["OBJ_ID"] = OBJ_ID out_result["file_name"] = [uuid_name1,uuid_name2] out_result["filepath"] = [filepath_result,filepath_model] msg["data"] = out_result except Exception as e: msg["status"] = False msg["error"] = '执行失败:%s' % e msg = json.dumps(msg) return msg
def save_mongo_csv(file, author_id, isHeader, separator, filename): client = pymongo.MongoClient(settings.MONGO_DB_URI) db = client.datahoop.data try: fr = open(file, mode='r', encoding='gbk') dlm = separator csv_reader = csv.reader(fr, delimiter=dlm) data = list(csv_reader) print(data) fileName = filename.replace((filename.split('.')[-1]), (filename.split('.')[-1]).lower()).replace( (filename.split('.')[-1]), (filename.split('.')[-1]).lower()) fileData = [] if isHeader == "True": for line in data: fileData.append(line) else: all_len_lines = [] for line in data: all_len_lines.append(len(line)) fileData.append(line) len_lines = [] for i in range(max(all_len_lines)): len_lines.append('A' + str(i + 1)) fileData.insert(0, len_lines) print(fileData) print('ooo') jsonData = { 'fileName': fileName, 'userID': author_id, 'fileData': fileData } object_id = db.insert(jsonData) object_id = string_type(object_id) except Exception as e: fr = open(file, mode='r') dlm = separator csv_reader = csv.reader(fr, delimiter=dlm) data = list(csv_reader) print(data) fileName = filename.replace((filename.split('.')[-1]), (filename.split('.')[-1]).lower()) fileData = [] if isHeader == "True": for line in data: fileData.append(line) else: all_len_lines = [] for line in data: all_len_lines.append(len(line)) fileData.append(line) len_lines = [] for i in range(max(all_len_lines)): len_lines.append('A' + str(i + 1)) fileData.insert(0, len_lines) print(fileData) print('ooo') jsonData = { 'fileName': fileName, 'userID': author_id, 'fileData': fileData } object_id = db.insert(jsonData) object_id = string_type(object_id) client.close() os.remove(file) # 删除存在本地的文件,本地不做保存 return object_id
def save_mongo_txt(file, author_id, isHeader, separator, filename): client = pymongo.MongoClient(settings.MONGO_DB_URI) db = client.datahoop.data print(isHeader, 'pppppppppppppppppppppp') try: data = open(file, encoding='utf-8') fileName = filename.replace((filename.split('.')[-1]), (filename.split('.')[-1]).lower()) fileData = [] if isHeader == "True": for line in data: fileData.append(line.replace('\n', '').split(separator)) else: all_len_lines = [] for line in data: all_len_lines.append(len(line.replace('\n', '').split(separator))) fileData.append(line.replace('\n', '').split(separator)) len_lines = [] for i in range(max(all_len_lines)): len_lines.append('A' + str(i + 1)) fileData.insert(0, len_lines) print(fileData) print('ppp') jsonData = { 'fileName': fileName, 'userID': author_id, 'fileData': fileData } except Exception as e: data = open(file, encoding='gb18030', errors='ignore') fileName = filename.replace((filename.split('.')[-1]), (filename.split('.')[-1]).lower()) fileData = [] if isHeader == "True": for line in data: fileData.append(line.replace('\n', '').split(separator)) print(fileData) print('ooo') print('dddddddddddddddddddddddddddddddddd') else: all_len_lines = [] for line in data: all_len_lines.append(len(line.replace('\n', '').split(separator))) fileData.append(line.replace('\n', '').split(separator)) len_lines = [] for i in range(max(all_len_lines)): len_lines.append('A' + str(i + 1)) fileData.insert(0, len_lines) print(fileData) print('ooo') jsonData = { 'fileName': fileName, 'userID': author_id, 'fileData': fileData } object_id = db.insert(jsonData) object_id = string_type(object_id) client.close() os.remove(file) # 删除存在本地的文件,本地不做保存 return object_id
def modelJson(request): """ 获取和保存模型<br> MJson:模型结果<br> MId:模型ID<br> modelName:模型名称<br> remark:模型备注<br> labelList:模型列表<br> """ msg = {'status': True, 'data': None} _phone = request.user.id if request.method == "GET": _id = request.GET.get('modelid', '') _modelName = request.GET.get("modelName") try: obj = ModelResult.objects.get(id=_id) mongoCli = cli.mark.models msg["mjson"] = mongoCli.find_one({"_id": ObjectId(obj.OBJID)})["models"] except Exception as e: msg["status"] = False msg["error"] = "获取失败" logger.error('获取模型失败:{0}'.format(e)) logger.info('获取模型列表:{0}'.format(msg)) return JsonResponse(msg) elif request.method == "POST": _mJson = request.POST.get("MJson") logger.info("模型长度:{}".format(len(_mJson))) _mid = request.POST.get("MId", "") _remark = request.POST.get("remark", "") _modelName = request.POST.get("modelName") _labelList = json.loads(request.POST.get("labelList", [1, 2, 3])) print(type(_labelList), _labelList) _author = request.user # 获取用户对象 logger.debug("模型保存请求参数:{0}--{1}--{2}".format(_modelName, _labelList, _remark)) try: if not _mid: '''没有MID值 则 判断:更新还是新建''' if ModelResult.objects.filter(ModelName=_modelName).count(): '''没有mid 但是modelName已经存在''' msg['status'] = False msg['error'] = '保存失败:模型名已存在,换个名字试试吧!' logger.info( '模型名已存在无法保存--用户:{0} ; 模型ID:{1}, 模型名称:{2}'.format( _phone, _mid, _modelName)) else: '''创建一条模型记录''' mongoCli = cli.mark.models OBJ = mongoCli.insert({"models": _mJson}) OBJ_ID = string_type(OBJ) cli.close() objID, status = ModelResult.objects.get_or_create( user=_author, ModelName=_modelName, OBJID=OBJ_ID, remark=_remark, ) # 保存标签 for i in _labelList: labelObj = Model_Label.objects.get(id=i) objID.label.add(labelObj) objID.save() logger.info("objID:{0}--状态:{1}".format(objID, status)) msg['modelid'] = objID.id logger.info( '保存成功--用户:{0} ; 模型ID:{1}, 模型名称:{2},数据:{3}'.format( _phone, _mid, _modelName, OBJ_ID)) else: '''更新模型''' if ModelResult.objects.filter(ModelName=_modelName).exclude( id=_mid).count(): '''modleName保持唯一''' msg['status'] = False msg['error'] = '保存失败:模型名已存在,换个名字试试吧!' logger.info( '模型名已存在无法保存--用户:{0} ; 模型ID:{1}, 模型名称:{2}'.format( _phone, _mid, _modelName)) else: obj = ModelResult.objects.get(id=_mid) obj.label.add(id=1) ModelResult.objects.filter(id=_mid).update( ModelName=_modelName) mongoCli = cli.mark.models mongoCli.update({'_id': ObjectId(obj.OBJID)}, {"models": _mJson}) cli.close() msg['modelid'] = _mid logger.info( '更新成功--用户:{0} ; 模型ID:{1}, 模型名称:{2},数据:{3}'.format( _phone, _mid, _modelName, obj.OBJID)) except Exception as e: logger.error('模型保存失败:{0}'.format(e)) msg['status'] = False msg['error'] = '保存失败!' return JsonResponse(msg)