Example #1
0
    def write_csv(self, data, name, **kwargs):
        """
        :param data: 数据,dict
        :param name: csv文件名
        :param kwargs: path,默认DIR_dict.get('CSV_DIR')
        :return: None
        """

        path = kwargs.get('path', DIR_dict.get('CSV_DIR'))
        name = name + '.csv'
        file = os.path.join(path, name)

        try:
            df = pd.DataFrame.from_dict(data,
                                        orient=kwargs.get('orient', 'columns'),
                                        dtype=kwargs.get('dtype'),
                                        columns=kwargs.get('columns')
                                        )
            df.to_csv(file, sep=',',
                      index=kwargs.get('index', False),
                      header=kwargs.get('header', True),
                      encoding=kwargs.get('encoding'),
                      )
        except Exception as e:
            print(e)
Example #2
0
    def write_excel(self, data, name, **kwargs):
        """
        :param data: 数据,dict
        :param name: excel文件名
        :param kwargs: path,默认DIR_dict.get('CSV_DIR')
        :return: None
        """

        path = kwargs.get('path', DIR_dict.get('EXCEL_DIR'))
        name = name + '.xlsx'
        file = os.path.join(path, name)

        try:
            df = pd.DataFrame.from_dict(data,
                                        orient=kwargs.get('orient', 'columns'),
                                        dtype=kwargs.get('dtype'),
                                        columns=kwargs.get('columns')
                                        )
            writer = pd.ExcelWriter(file)
            df.to_excel(writer,
                        sheet_name=kwargs.get('sheet_name', "Sheet1"),
                        index=kwargs.get('index', False),
                        header=kwargs.get('header', True),
                        encoding=kwargs.get('encoding'),
                        )
            writer.save()
        except Exception as e:
            print(e)
Example #3
0
 def save(self, info, **kwargs):
     save_name = kwargs.get("save_name", "untitle.pickle")
     path = kwargs.get("save_path", DIR_dict.get("PICKLE_DIR"))
     try:
         with open(path + '\\' + '{0}.pickle'.format(save_name),
                   'wb') as file:
             pickle.dump(info, file)
     except Exception as e:
         print(e)
Example #4
0
    def image_segmentation(self,
                           name="starbucks.jpg",
                           save_name="result-bull-starbucks.jpg"):
        # 导入图片数据
        path = os.path.join(DIR_dict.get("PICTURE_DIR"), name)
        imgData, row, col = self.import_private_data(image_segmentation=True,
                                                     path=path)

        # 创建模型
        model = self.create_model(X_train=imgData, KMeans=True, n_clusters=4)
        label = model.fit_predict(imgData)

        # 数据展示
        label = label.reshape([row, col])
        pic_new = image.new("L", (row, col))
        for i in range(row):
            for j in range(col):
                pic_new.putpixel((i, j), int(256 / (label[i][j] + 1)))
        pic_new.save(
            os.path.join(DIR_dict.get("PICTURE_DIR"), save_name, "JPEG"))
Example #5
0
    def ImportDatabase(self, db, host, **kwargs):
        path = kwargs.get('path', os.path.join(DIR_dict.get('JSON_DIR'), db))
        cols = [col.split('.json')[0] for col in os.listdir(path)]
        print("待导入集合:{0}\n".format(cols))
        for i, col in enumerate(cols):
            name = db + '\\' + col
            self.ImportCollection(col, name, host)
            print("{0}/{1}:集合{2}导入完毕!".format(i + 1, len(cols), col))
        print("全部集合导入完毕!")

        if not kwargs.get('save_dir'):
            self.common.rmdir(path)
Example #6
0
 def read_png(self, name, **kwargs):
     """
     :param name: png文件名
     :param kwargs: path,默认DIR_dict.get('PNG_DIR')
     """
     from PIL import Image
     path = kwargs.get('path', DIR_dict.get('PNG_DIR'))
     name = name + '.png'
     file = os.path.join(path, name)
     try:
         im = Image.open(file)
         im.show()
     except Exception as e:
         print(e)
Example #7
0
 def selenium(self):
     executable_path = os.path.join(DIR_dict.get('EXE_DIR'),
                                    'chromedriver.exe')
     chrome_options = webdriver.ChromeOptions()
     """后台运行Chromedriver"""
     chrome_options.add_argument('--headless')
     chrome_options.add_argument('--no-sandbox')
     # chrome_options.add_argument('--start-maximized')
     browser = webdriver.Chrome(executable_path=executable_path,
                                chrome_options=chrome_options)
     """全屏显示"""
     browser.maximize_window()
     time.sleep(5)
     return browser
Example #8
0
    def write_png(self, name, **kwargs):
        """
        :param data: 数据,任意格式
        :param name: png文件名
        :param kwargs: path,默认DIR_dict.get('PNG_DIR')
        :return: None
        """

        path = kwargs.get('path', DIR_dict.get('PNG_DIR'))
        name = name + '.png'
        file = os.path.join(path, name)

        try:
            self.fig.savefig(file, dpi=500, bbox_inches='tight')
        except Exception as e:
            print(e)
Example #9
0
    def read_txt(self, name, **kwargs):
        """
        读取txt文件
        :param name: txt文件名
        :param kwargs: path,默认DIR_dict.get('TXT_DIR')
        :return: data, 任意格式
        """

        path = kwargs.get('path', DIR_dict.get('TXT_DIR'))
        name = name + '.txt'
        file = os.path.join(path, name)

        try:
            with open(file, 'r') as f:
                return f.readlines()
        except Exception as e:
            print(e)
Example #10
0
    def write_pickle(self, data, name, **kwargs):
        """
        :param data: 数据,任意格式
        :param name: pickle文件名
        :param kwargs: path,默认DIR_dict.get('PICKLE_DIR')
        :return: None
        """

        path = kwargs.get('path', DIR_dict.get('PICKLE_DIR'))
        name = name + '.pickle'
        file = os.path.join(path, name)

        try:
            with open(file, 'wb') as f:
                pickle.dump(data, f)
        except Exception as e:
            print(e)
Example #11
0
    def read_pickle(self, name, **kwargs):
        """
        读取pickle文件
        :param name: pickle文件名
        :param kwargs: path,默认DIR_dict.get('PICKLE_DIR')
        :return: data, 任意格式
        """

        path = kwargs.get('path', DIR_dict.get('PICKLE_DIR'))
        name = name + '.pickle'
        file = os.path.join(path, name)

        try:
            with open(file, 'rb') as f:
                return pickle.load(f)
        except Exception as e:
            print(e)
Example #12
0
    def write_json(self, data, name, **kwargs):
        """
        :param data: 数据,dict
        :param name: json文件名
        :param kwargs: path,默认DIR_dict.get('JSON_DIR')
        :return: None
        """

        path = kwargs.get('path', DIR_dict.get('JSON_DIR'))
        name = name + '.json'
        file = os.path.join(path, name)

        try:
            with open(file, 'w', encoding='utf-8') as f:
                json.dump(data, f, ensure_ascii=False)
        except Exception as e:
            print(e)
Example #13
0
    def read_csv(self, name, **kwargs):
        """
        读取csv文件
        :param name: csv文件名
        :param kwargs: path,默认DIR_dict.get('CSV_DIR')
        :return: data, list/DataFrame
        """

        path = kwargs.get('path', DIR_dict.get('CSV_DIR'))
        name = name + '.csv'
        file = os.path.join(path, name)

        try:
            data = pd.read_csv(file)
            return data
        except Exception as e:
            print(e)
Example #14
0
    def Backup(self, db, host, **kwargs):

        path = kwargs.get('path', DIR_dict.get('BACKUP_DIR'))
        file = os.path.join(path, 'mongo')

        USER = kwargs.get('USER', self.user)
        PASSWORD = kwargs.get('PASSWORD', self.passwd)

        print("正在备份数据库:{0}\n".format(db))
        try:
            os.system(
                """mongodump -h {0} --authenticationDatabase admin -u {1} -p {2} -d {3} -o {4}"""
                .format(host, USER, PASSWORD, db, file))
        except Exception as e:
            self.logger.warning("数据库备份失败,原因:{0}".format(e))

        print("数据库{0}备份完毕!".format(db))
Example #15
0
    def read_excel(self, name, **kwargs):
        """
        读取excel文件
        :param name: excel文件名
        :param kwargs: path,默认DIR_dict.get('CSV_DIR')
        :return: data, list/DataFrame
        """

        path = kwargs.get('path', DIR_dict.get('EXCEL_DIR'))
        name = name + '.xlsx'
        file = os.path.join(path, name)

        try:
            return pd.read_excel(file, )
        except Exception as e:
            print(e)
            return
Example #16
0
    def read_h5(self, name, **kwargs):
        """
        读取h5文件
        :param name: h5文件名
        :param kwargs: path,默认DIR_dict.get('H5_DIR')
        :return: data, list/DataFrame
        """
        import h5py

        path = kwargs.get('path', DIR_dict.get('H5_DIR'))
        name = name + '.h5'
        file = os.path.join(path, name)

        try:
            data = h5py.File(file, 'r')
            return data
        except Exception as e:
            print(e)
Example #17
0
    def write_h5(self, data, labels, name, **kwargs):
        """
        :param data: 数据,dict
        :param name: h5文件名
        :param kwargs: path,默认DIR_dict.get('H5_DIR')
        :return: None
        """
        import h5py

        path = kwargs.get('path', DIR_dict.get('H5_DIR'))
        name = name + '.h5'
        file = os.path.join(path, name)

        try:
            with h5py.File(file, 'w') as f:
                f['data'] = data  # 将数据写入文件的主键data下面
                f['labels'] = labels  # 将数据写入文件的主键labels下面
        except Exception as e:
            print(e)
Example #18
0
    def write_txt(self, data, name, **kwargs):
        """
        :param data: 数据,任意格式
        :param name: txt文件名
        :param kwargs: path,默认DIR_dict.get('TXT_DIR')
        :return: None
        """

        path = kwargs.get('path', DIR_dict.get('TXT_DIR'))
        name = name + '.txt'
        file = os.path.join(path, name)

        try:
            with open(file, 'w') as f:
                for line in data:
                    f.write(line)
                    f.write('\n')
        except Exception as e:
            print(e)
Example #19
0
    def Restore(self, db, host, **kwargs):
        """
        mongorestore -h 172.39.215.213 --authenticationDatabase admin -uroot -p 密码 -d epo --dir /root/backup/mongo/epo
        :return:
        """
        path = kwargs.get('path', DIR_dict.get('BACKUP_DIR'))
        file = os.path.join(path, 'mongo/{0}'.format(db))

        USER = kwargs.get('USER', self.user)
        PASSWORD = kwargs.get('PASSWORD', self.passwd)

        print("正在恢复数据库:{0}\n".format(db))
        try:
            os.system(
                """mongorestore -h {0} --authenticationDatabase admin -u {1} -p {2} -d {3} --dir {4}"""
                .format(host, USER, PASSWORD, db, file))
        except Exception as e:
            self.logger.warning("数据库恢复失败,原因:{0}".format(e))

        print("数据库{0}恢复完毕!".format(db))
Example #20
0
    def read_json(self, name, **kwargs):
        """
        读取json文件
        :param name: json文件名
        :param kwargs: path,默认DIR_dict.get('JSON_DIR')
        :return: data, list/DataFrame
        """

        path = kwargs.get('path', DIR_dict.get('JSON_DIR'))
        name = name + '.json'
        file = os.path.join(path, name)

        try:
            return json.load(file)
        except:
            try:
                with open(file, 'rb') as f:
                    content = f.read().decode("utf-8")
                    return json.loads(content)
            except Exception as e:
                print(e)
Example #21
0
    def ExportCollection(self, collection, name, host='127.0.0.1', **kwargs):
        """
        往当前数据库中导出文件,形成collection
        :param collection: 集合名
        :param name: 文件名
        :param kwargs: path: DIR_dict.get('JSON_DIR')
        :return: None
        """

        path = kwargs.get('path', DIR_dict.get('JSON_DIR'))
        file = os.path.join(path, name + '.json')
        USER = kwargs.get('USER', self.user)
        PASSWORD = kwargs.get('PASSWORD', self.passwd)

        self.logger.warning(
            "正在将当前数据库的collection:{0}导出,形成json文件".format(collection))
        try:
            # os.system("""mongoexport -h {0} -d {1} -c {2} -o {3}""".format(host, self.db.name, collection, file))
            os.system(
                """mongoexport -h {0} --authenticationDatabase admin -u {1} -p {2} -d {3} -c {4} -o {5}"""
                .format(host, USER, PASSWORD, self.db.name, collection, file))
        except Exception as e:
            self.logger.warning("集合导出失败,原因:{0}".format(e))
Example #22
0
    def ImportCollection(self, collection, name, host, **kwargs):
        """
        往当前数据库中导入文件,形成collection
        :param collection: 集合名
        :param name: 文件名
        :param kwargs: path: DIR_dict.get('JSON_DIR')
        :return: None
        """

        path = kwargs.get('path', DIR_dict.get('JSON_DIR'))
        file = os.path.join(path, name + '.json')
        USER = kwargs.get('USER', 'root')
        PASSWORD = kwargs.get('PASSWORD', '密码')

        self.logger.warning(
            "正在往当前数据库中导入json文件,形成collection:{0}".format(collection))
        try:
            # os.system("""mongoimport -h 192.168.0.253 --authenticationDatabase admin -uroot -p 密码 -d {0} -c {1} --file {2}""".format(self.db.name, collection, file))
            os.system(
                """mongoimport -h {0} --authenticationDatabase admin -u {1} -p {2} -d {3} -c {4} --file {5}"""
                .format(host, USER, PASSWORD, self.db.name, collection, file))
        except Exception as e:
            self.logger.warning("集合导入失败,原因:{0}".format(e))
Example #23
0
    def import_private_data(self, **kwargs):
        if kwargs.get("province_expense"):

            def loadData(filePath):
                fr = open(filePath, 'r+', encoding='gbk')
                lines = fr.readlines()
                retData = []
                retCityName = []
                for line in lines:
                    items = line.strip().split(",")
                    retCityName.append(items[0])
                    retData.append(
                        [float(items[i]) for i in range(1, len(items))])
                return retData, retCityName

            path = os.path.join(DIR_dict.get("TXT_DIR"), "city.txt")
            return loadData(path)

        elif kwargs.get("online_times"):

            def loadData(filePath):
                mac2id = dict()
                onlinetimes = []
                f = open(filePath, encoding='utf-8')
                for line in f:
                    mac = line.split(',')[2]
                    onlinetime = int(line.split(',')[6])
                    starttime = int(
                        line.split(',')[4].split(' ')[1].split(':')[0])
                    if mac not in mac2id:
                        mac2id[mac] = len(onlinetimes)
                        onlinetimes.append((starttime, onlinetime))
                    else:
                        onlinetimes[mac2id[mac]] = [(starttime, onlinetime)]
                real_X = np.array(onlinetimes).reshape((-1, 2))
                return real_X

            path = os.path.join(DIR_dict.get("TXT_DIR"), "online_times.txt")
            return loadData(path)

        elif kwargs.get("image_segmentation"):

            def loadData(filePath):
                f = open(filePath, 'rb')
                data = []
                img = image.open(f)
                m, n = img.size
                for i in range(m):
                    for j in range(n):
                        x, y, z = img.getpixel((i, j))[:3]
                        data.append([x / 256.0, y / 256.0, z / 256.0])
                f.close()
                return np.mat(data), m, n

            return loadData(filePath=kwargs.get("path"))

        elif kwargs.get("posture"):

            def load_dataset(feature_paths, label_paths):
                """
                读取特征文件列表和标签文件列表中的内容,归并后返回
                :param feature_paths:
                :param label_paths:
                :return:
                """
                # 定义空的标签变量label,特征数组feature
                feature = np.ndarray(shape=(0, 41))
                label = np.ndarray(shape=(0, 1))

                for file in feature_paths:
                    # 使用逗号分隔符读取特征数据,将问号替换标记为缺失值,文件中不包含表头
                    df = pd.read_table(file,
                                       delimiter=',',
                                       na_values='?',
                                       header=None)
                    # 使用平均值补全缺失值,然后将数据进行补全
                    imp = SimpleImputer(missing_values="NaN", strategy="mean")
                    # imp = Imputer(missing_values="NaN", strategy="mean", axis=0)
                    imp.fit(df)
                    df = imp.transform(df)
                    # 将新读入的数据合并到特征集合中
                    feature = np.concatenate((feature, df))

                for file in label_paths:
                    # 读入标签数据,文件中不包含表头
                    df = pd.read_table(file, header=None)
                    # 将新读入的数据合并到标签集合中
                    label = np.concatenate((label, df))

                return feature, label

            # 设置数据路径
            feature_paths = []
            label_paths = []
            paths = [
                os.path.join(DIR_dict.get("TXT_DIR"), "posture") + '\\' +
                letter for letter in ["A", "B", "C", "D", "E"]
            ]
            for path in paths:
                for file in os.listdir(path):
                    if ".feature" in file:
                        feature_paths.append(os.path.join(path, file))
                    elif ".label" in file:
                        label_paths.append(os.path.join(path, file))
            # 将前4个数据作为训练集读入
            X_train, y_train = load_dataset(feature_paths=feature_paths[:4],
                                            label_paths=label_paths[:4])
            # 将最后一个数据作为测试集读入
            X_test, y_test = load_dataset(feature_paths=feature_paths[4:],
                                          label_paths=label_paths[4:])
            return X_train, X_test, y_train, y_test

        elif kwargs.get("stock"):
            # read_csv:参数一:数据源.encoding:编码格式.parse_dates:第n列解析为日期.index_col:用作索引的列编号
            # sort_index:参数一:按0列排,ascending(true)升序,inplace:排序后是否覆盖原数据
            data = pd.read_csv(os.path.join(DIR_dict.get("CSV_DIR"),
                                            '000777.csv'),
                               encoding='gbk',
                               parse_dates=[0],
                               index_col=0)
            data.sort_index(0, ascending=True, inplace=True)

            # dayfeature:选取150天的数据
            # featurenum:选取5个特征*天数
            # x:记录150天的5个特征值
            # y:记录涨或者跌
            # data.shape[0]-dayfeature:因为我们要用150天数据做训练,对于条目为200条的数据,只有50条数据有前150天的数据来训练的,所以训练集的大小就是200-150
            # 对于每一条数据,他的特征是前150天的甩有特征数据,即150*5,+1是将当天的开盘价引入作为一条特征数据
            dayfeature = 150
            featurenum = 5 * dayfeature
            x = np.zeros((data.shape[0] - dayfeature, featurenum + 1))
            y = np.zeros((data.shape[0] - dayfeature))

            for i in range(0, data.shape[0] - dayfeature):
                x[i, 0:featurenum] = np.array(data[i:i + dayfeature] \
                                                  [['收盘价', '最高价', '最低价', '开盘价', '成交量']]).reshape((1, featurenum))
                x[i, featurenum] = data.iloc[i + dayfeature]['开盘价']

            for i in range(0, data.shape[0] - dayfeature):
                if data.iloc[i + dayfeature]['收盘价'] >= data.iloc[
                        i + dayfeature]['开盘价']:
                    y[i] = 1
                else:
                    y[i] = 0
            return x, y

        elif kwargs.get("house_price"):
            X = []
            y = []

            with open(os.path.join(DIR_dict.get("TXT_DIR"), 'prices.txt'),
                      'r') as file:
                lines = file.readlines()
                for line in lines:
                    items = line.strip().split(',')
                    X.append(int(items[0]))
                    y.append(int(items[1]))

            length = len(X)
            X = np.array(X).reshape([length, 1])
            y = np.array(y)
            return X, y

        elif kwargs.get("traffic"):
            data = np.genfromtxt(os.path.join(DIR_dict.get("CSV_DIR"),
                                              'traffic.csv'),
                                 delimiter=',',
                                 skip_header=True)
            X = data[:, 1:5]
            y = data[:, 5]
            return X, y

        elif kwargs.get("handwriting"):

            def img2vector(fileName):
                retMat = np.zeros([1024], int)  # 定义返回的矩阵,大小为1*1024
                with open(fileName) as file:
                    lines = file.readlines()  # 读取文件的所有行
                    for i in range(32):  # 遍历文件所有行
                        for j in range(32):  # 并将01数字存放在retMat中
                            retMat[i * 32 + j] = lines[i][j]
                return retMat

            def readDataSet(path):
                fileList = os.listdir(path)  # 获取文件夹下的所有文件
                numFiles = len(fileList)  # 统计需要读取的文件的数目
                dataSet = np.zeros([numFiles, 1024], int)  # 用于存放所有的数字文件
                hwLabels = np.zeros([numFiles])  # 用于存放对应的标签(与神经网络的不同)
                for i in range(numFiles):  # 遍历所有的文件
                    filePath = fileList[i]  # 获取文件名称/路径
                    digit = int(filePath.split('_')[0])  # 通过文件名获取标签
                    hwLabels[i] = digit  # 直接存放数字,并非one-hot向量
                    dataSet[i] = img2vector(path + '/' + filePath)  # 读取文件内容
                return dataSet, hwLabels
                # read dataSet

            path = os.path.join(DIR_dict.get("TXT_DIR"), "digits")
            train_dataSet, train_hwLabels = readDataSet(
                path=os.path.join(path, 'trainingDigits'))
            test_dataSet, test_hwLabels = readDataSet(
                path=os.path.join(path, 'testDigits'))
            return train_dataSet, test_dataSet, train_hwLabels, test_hwLabels
Example #24
0
 def job(self, urls):
     client = YouGet(
         path=self.path if self.path else DIR_dict.get('RB_DIR'))
     for url in urls:
         client.download(url)