Ejemplo n.º 1
0
class ImportCSV(SparkOperator):
    '''import csv formate data'''


    OP_NAME = 'import-csv'
    OP_CATEGORY = 'data-import'

    def __init__(self):
        super(ImportCSV, self).__init__()
        self.op_input_num = 0
        self.op_output_num = 1
        self.op_status = OperatorStatus.INIT
        self.op_script_location = 'resources/spark_operators/data_import/import_csv.py'
        self.op_backend = 'spark'

        self.input_path = None
        self.delimiter = None

    def init_operator(self, op_json_param):
        self.op_json_param = op_json_param
        self.op_running_mode = self.op_json_param['running-mode'] if 'running-mode' in self.op_json_param else 'script'
        self.op_local = bool(self.op_json_param['local']) if 'local' in self.op_json_param else True

        if self.op_local:
            self.op_script_location = os.getcwd() + '/' + self.op_script_location

        self.op_working_directory = self.op_json_param['op-working-directory'] if 'op-working-directory' in self.op_json_param else None 
        self.op_logger = Logger(self.op_working_directory + '/log/import-csv_' + str(self.op_json_param['op-index']))

        self.input_path = self.op_json_param['input-path']
        self.delimiter = self.op_json_param['delimiter'] if 'delimiter' in self.op_json_param else ','

    def run_function_mode(self):
        return self.op_status

    def run_script_mode(self):
        run_command = 'spark-submit --master '
        if self.op_local:
            run_command = run_command + 'local[2] ' 

        self.op_result.append(self.op_working_directory + 'output/' + self.op_json_param['op-index'] + '-output')
        run_command = run_command + self.op_script_location + ' ' + self.input_path + ' ' + self.op_result[0] + ' ' + self.delimiter
        sub_proc = subprocess.Popen(run_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        
        for line in iter(sub_proc.stdout.readline, b''):
            self.op_logger.info(line)

        sub_proc.stdout.close()
        sub_proc.wait()
        self.op_status = sub_proc.returncode
        return self.op_status

    def azkaban_script(self):
        run_command = 'spark-submit --master '
        if self.op_local:
            run_command = run_command + 'local[2] '
            
        self.op_result.append(self.op_working_directory + 'output/' + self.op_json_param['op-index'] + '-output')
        run_command = run_command + self.op_script_location + ' ' + self.input_path + ' ' + self.op_result[0] + ' ' + self.delimiter
        return run_command
Ejemplo n.º 2
0
    class TestNotifier(object):
        def __init__(self):
            self.service_name = 'test_service'
            self.meta = meta
            self.request_body = {
                'jobId': 1,
                'stepId': 1,
                'batchId': 0,
                'retry': 0,
                'status': 3,
                'message': 'RUNNING',
                'groupName': 'TestGroupName'
            }
            self.is_notifiable = True
            #self.is_notifiable = False
            self.logger = Logger(log_level="info",
                                 vendor_key=20,
                                 retailer_key=100)
            self.logger.info(
                'tttttttttttttttteeeeeeeeeeeeeeeeeeessssssssssstttttttttttttt')

        @Notifier
        def test(self):
            print('yoyo')

        @Notifier
        def test_failure(self):
            raise RuntimeError('Calling error.')

        def main(self):
            self.test()
            #self.test_failure()
            print('done')
Ejemplo n.º 3
0
class BaiDuGet(unittest.TestCase):
    '''百度get测试,带headers'''
    def setUp(self):
        self.baidu = Baidu("case.xls", "baidu")
        self.logger = Logger("FOX",
                             cmd_log_level=logging.INFO,
                             file_log_level=logging.INFO)

    def test_baidu_get(self):
        try:
            status_code = self.baidu.baidu_get()
            self.assertEqual(status_code, 200)
            self.logger.info("baidu get success")
        except BaseException as e:
            self.logger.error("baidu get fail")
            raise

    def tearDown(self):
        pass
Ejemplo n.º 4
0
class AppEE(object):
    def __init__(self,
                 service=None,
                 endpoint=None,
                 api_key=None,
                 export=EXPORT_CLIPBOARD):
        self.export = export
        self.api_request = ApiRequest(service=service,
                                      endpoint=endpoint,
                                      api_key=api_key)
        self.logger = Logger().get_logger()

    def __export_app(self, app_uuid, text, target):
        try:
            if self.export == EXPORT_CLIPBOARD:
                pyperclip.copy(text)
                self.logger.info(
                    'Your generated script is copied to clipboard!')
                # mb.showinfo('Info', 'Your generated script is copied to clipboard!')

            elif self.export == EXPORT_FILE:
                file_path = ''

                if target == TARGET_INCREMENTAL:
                    file_path = os.path.join(
                        os.getcwd(), f'update_app_parameters_{app_uuid}.sql')
                elif target == TARGET_FINAL:
                    file_path = os.path.join(os.getcwd(),
                                             f'final_app_{app_uuid}.sql')
                else:
                    raise Exception(
                        f"The file path '{file_path}' does not exist.")

                with open(file_path, 'w') as f:
                    f.write(text)
                    self.logger.info(
                        f'Your generated script is written to the file {file_path}.'
                    )
                    # mb.showinfo('Info', f'Your generated script is written to the file {file_path}.')
        except Exception as e:
            self.logger.exception(f'Failed to export your script.', exc_info=e)
            # mb.showerror('Error', f'Failed to export your script. {repr(e)}')

    def __export_all_apps(self, text):
        try:
            if self.export == EXPORT_CLIPBOARD:
                pyperclip.copy(text)
                self.logger.info(
                    'Your generated script is copied to clipboard!')

            elif self.export == EXPORT_FILE:
                file_path = 'insert_all_apps.sql'

                with open(file_path, 'w') as f:
                    f.write(text)
                    self.logger.info(
                        f'Your generated script is written to the file {file_path}.'
                    )
        except Exception as e:
            self.logger.exception(f'Failed to export your script.', exc_info=e)

    def get_app(self, uuid):
        path = f'timeSeriesGroups/apps/{uuid}'
        return self.api_request.get(path=path)

    def get_all(self):
        path = 'timeSeriesGroups/apps/all'
        return self.api_request.get(path=path)

    def is_super_user(self):
        path = 'superusers/check'
        res = self.api_request.get(path=path)
        return res['value']

    def sql_update_parameters(self, app_uuid):
        try:
            app = self.get_app(uuid=app_uuid)
            parameters = json.dumps(app['parameters']).replace("'", "''")

            res = f"UPDATE time_series_group_app SET parameters = '{parameters}'::jsonb WHERE time_series_group_app_uuid = '{app_uuid}'::uuid;"

            self.logger.info('----- UPDATE APP PARAMETERS STATEMENT -----')
            self.logger.info(res)

            self.__export_app(app_uuid, res, TARGET_INCREMENTAL)

        except Exception as e:
            self.logger.exception(f'Failed to get app {app_uuid}.', exc_info=e)

    def sql_insert_final(self, app_uuid):
        try:
            app = self.get_app(uuid=app_uuid)

            settings = {}
            if 'description' in app:
                settings['description'] = app['description']
            if 'show_result' in app:
                settings['show_result'] = app['show_result']
            if 'alertable' in app:
                settings['alertable'] = app['alertable']
            if 'trainable' in app:
                settings['trainable'] = app['trainable']
            if 'inferenceable' in app:
                settings['inferenceable'] = app['inferenceable']

            settings_str = json.dumps(settings).replace("'", "''")

            parameters_str = json.dumps(app['parameters']).replace("'", "''")

            res = "INSERT INTO time_series_group_app(time_series_group_app_uuid, time_series_group_app_name, display_name, settings, parameters, endpoint_meta, endpoint_train, endpoint_inference, stage, type) VALUES (" + \
                  f"'{app_uuid}', '{app['appName']}', '{app['displayName']}', '{settings_str}', '{parameters_str}', '{app['endpointMeta']}', '{app['endpointTrain']}', '{app['endpointInference']}', 'Public', 'Internal'" + \
                  ");"

            self.logger.info('----- INSERT APP STATEMENT -----')
            self.logger.info(res)

            self.__export_app(app_uuid, res, TARGET_FINAL)
        except Exception as e:
            self.logger.exception(f'Failed to get app {app_uuid}.', exc_info=e)

    def sql_insert_all(self, only_public=True):
        try:
            if not self.is_super_user():
                raise PermissionError(
                    'You do not have the permission to generate script for all apps.'
                )

            apps = self.get_all()

            if only_public:
                apps = filter(lambda app: app['stage'] == 'Public',
                              apps['value'])

            res = "TRUNCATE TABLE time_series_group_app;" + os.linesep
            res += "INSERT INTO time_series_group_app(time_series_group_app_uuid, time_series_group_app_name, display_name, settings, parameters, endpoint_meta, endpoint_train, endpoint_inference, stage, type) VALUES "

            for app in apps:
                settings = {}
                if 'description' in app:
                    settings['description'] = app['description']
                if 'show_result' in app:
                    settings['show_result'] = app['show_result']
                if 'alertable' in app:
                    settings['alertable'] = app['alertable']
                if 'trainable' in app:
                    settings['trainable'] = app['trainable']
                if 'inferenceable' in app:
                    settings['inferenceable'] = app['inferenceable']

                settings_str = json.dumps(settings).replace("'", "''")

                parameters_str = json.dumps(app['parameters']).replace(
                    "'", "''")

                stmt = os.linesep + f"('{app['appId']}', '{app['appName']}', '{app['displayName']}', '{settings_str}', '{parameters_str}', '{app['endpointMeta']}', '{app['endpointTrain']}', '{app['endpointInference']}', 'Public', 'Internal'),"

                res += stmt

            res = res.rstrip(',') + ';'

            self.logger.info('----- INSERT APPS STATEMENT -----')
            self.logger.info(res)

            self.__export_all_apps(res)

        except Exception as e:
            self.logger.exception('Failed to generate script for all apps.',
                                  exc_info=e)
Ejemplo n.º 5
0
def train_v1(opts):

    ## log

    ## devices
    devices = DeviceInfo.get_devices(
        [int(i) for i in opts.processors.split(',')])
    logger.info('devices: {}'.format(devices))

    ## metric
    rmse = RMSE()
    loss_metric = [BaseMetric(name='loss')]
    loss_metric.append(BaseMetric('nose'))
    loss_metric.append(BaseMetric('delta'))
    loss_metric.append(BaseMetric('merge'))

    ## network
    # net = resnet18_v2(classes=136)
    net = get_mobilenet_v2(multiplier=1.0, classes=opts.num_classes)
    if opts.pretrained_name is not None and opts.pretrained_dir is not None:
        logger.info('loading pre-trained {} ...'.format(
            os.path.join(opts.pretrained_dir, opts.pretrained_name)))
        net.load_params(os.path.join(opts.pretrained_dir,
                                     opts.pretrained_name),
                        ctx=devices,
                        allow_missing=True)
        logger.info('load pre-trained model complete.')
    else:
        # net.hybridize()
        net.initialize(init=mx.init.Normal(sigma=0.01),
                       force_reinit=True,
                       ctx=devices)
        logger.info('initial params with random data.')
    net.collect_params().reset_ctx(devices)
    logger.info('net: {}'.format(net))

    ## loss
    loss_funcs = []
    # loss_funcs.append(mx.gluon.loss.SoftmaxCrossEntropyLoss())
    loss_funcs.append(mx.gluon.loss.L1Loss())
    # loss_funcs.append(L1Loss())
    # loss_funcs.append(mx.gluon.loss.L2Loss())
    # loss_funcs.append(L2Loss())

    ## optimizer
    lr_scheduler = mx.lr_scheduler.FactorScheduler(step=3,
                                                   factor=0.1,
                                                   base_lr=opts.learning_rate,
                                                   stop_factor_lr=1e-6)

    ## trainer
    trainer = mx.gluon.Trainer(
        net.collect_params(),
        opts.optimizer,
        {
            'learning_rate': opts.learning_rate,
            # 'momentum': opts,
            'wd': opts.weight_decay
            # 'lr_scheduler': lr_scheduler
        })

    ## datsets
    logger.info('loading datasets ...')
    train_iter = RecDataIterV1(rec_dir=opts.rec_dir,
                               rec_prefix=opts.rec_prefix,
                               batch_size=opts.batch_size,
                               shuffle=opts.shuffle)
    logger.info('load datasets complete.')

    params = net.collect_params()
    param_names = params.keys()

    logger.info('starting training ...',
                show_type=logger.LOG_STYLE.DEFAULT,
                forground=logger.LOG_FRONT_COLOR.RED,
                background=logger.LOG_BACK_COLOR.DEFAULT)
    start_time = time.time()
    global_step = 0
    for epoch in range(opts.min_epoch, opts.max_epoch):
        for idx_step, batch_data in enumerate(iter(train_iter)):
            datas = mx.gluon.utils.split_and_load(batch_data.data[0],
                                                  ctx_list=devices,
                                                  batch_axis=0)
            targets = mx.gluon.utils.split_and_load(batch_data.label[0],
                                                    ctx_list=devices,
                                                    batch_axis=0)
            losses = []
            with mx.autograd.record():
                for datai, targeti in zip(datas, targets):
                    # print('[===]  datai: {}  {}  {}'.format(datai.shape, datai.dtype, type(datai)))
                    # print('[===]  targeti: {}  {}  {}'.format(targeti.shape, targeti.dtype, type(targeti)))
                    targeti = targeti / 128.0
                    datai = (datai - 127.5) / 128.0
                    predicts = net(datai)

                    # print('[===] targeti min-max: {}  {}'.format(mx.nd.max(targeti[0, :]).asnumpy()[0], mx.nd.min(targeti[0, :]).asnumpy()[0]))
                    # print('[===] predicts min-max: {}  {}'.format(mx.nd.max(predicts[0, :]).asnumpy()[0], mx.nd.min(predicts[0, :]).asnumpy()[0]))
                    mse_loss = loss_funcs[0](targeti, predicts)

                    if False:
                        points = targeti.reshape((-1, 68, 2))
                        points = points.asnumpy() * 128.0
                        image = datai[0, :, :, :].asnumpy() * 128.0 + 127.5
                        image = np.transpose(image, (1, 2, 0))
                        image = image.astype(np.uint8)
                        print('image: {}  {}'.format(image.shape, image.dtype))
                        import cv2
                        points = points[0, :, :]
                        for j in range(68):
                            point = points[j, :]
                            image = cv2.circle(img=image,
                                               center=(int(round(point[0])),
                                                       int(round(point[1]))),
                                               radius=1,
                                               color=(255, 0, 0),
                                               thickness=-1)
                        import matplotlib.pyplot as plt
                        plt.imshow(image.get())  ## cv2.UMat --> np.array
                        # plt.imshow(image)  ## cv2.UMat --> np.array
                        plt.show()

                        sys.exit(0)

                    losses.extend(mse_loss)

                    ## update metrics
                    loss_metric[0].update(mse_loss.shape[0],
                                          mse_loss.asnumpy().sum())

            mx.autograd.backward(losses)
            # for loss in losses:
            #     loss.backward()
            #     mx.autograd.backward(loss)
            trainer.step(batch_data.data[0].shape[0])
            mx.nd.waitall()

            ## log
            elapse_time = time.time() - start_time
            samples_per_second = 0
            if global_step > 30:
                start_time = time.time()
                samples_per_second = int(
                    opts.batch_size * opts.checkpoint_interval / elapse_time)
            if (global_step + 1) % opts.log_interval == 0:
                logger.info(
                    '[{}/{}][{}/{}] [loss: {:.6f} / {:.6f}]  [{} samples/s]  [lr: {:.10f}]'
                    .format(epoch, opts.max_epoch - opts.min_epoch, idx_step,
                            int(train_iter.max_index / opts.batch_size),
                            loss_metric[0].get_avg()[0],
                            loss_metric[0].get_avg()[1], samples_per_second,
                            lr_scheduler.base_lr))

            ## update metrics

            ## update global step
            global_step += 1

        ## update trainer
        train_iter.reset()

        ## update learning rate
        lr_scheduler(epoch - opts.min_epoch)
        trainer.set_learning_rate(lr_scheduler.base_lr)
        print('lr: ', lr_scheduler.base_lr, epoch - opts.min_epoch)

        ## save checkpoint
        if (epoch + 1) % opts.checkpoint_interval == 0:
            checkpoint_path = os.path.join(
                opts.checkpoint_dir,
                opts.checkpoint_prefix + '{}'.format(epoch))
            logger.info(
                'begin save checkpoints {} ...'.format(checkpoint_path),
                show_type=logger.LOG_STYLE.DEFAULT,
                forground=logger.LOG_FRONT_COLOR.RED,
                background=logger.LOG_BACK_COLOR.DEFAULT)
            net.save_params(filename=checkpoint_path)
            logger.info('complete save checkpoints.',
                        show_type=logger.LOG_STYLE.DEFAULT,
                        forground=logger.LOG_FRONT_COLOR.RED,
                        background=logger.LOG_BACK_COLOR.DEFAULT)
Ejemplo n.º 6
0
def info(msg: str):
    """
    log Info
    :param msg:
    """
    Logger.info(msg)
Ejemplo n.º 7
0
def train_v1(opts):

    ## log

    ## devices
    devices = DeviceInfo.get_devices(
        [int(i) for i in opts.processors.split(',')])
    logger.info('devices: {}'.format(devices))

    ## metric
    rmse = RMSE()
    loss_metric = [BaseMetric(name='loss')]
    loss_metric.append(BaseMetric('l1'))
    loss_metric.append(BaseMetric('l2'))
    loss_metric.append(BaseMetric('l3'))
    loss_metric.append(BaseMetric('l4'))
    loss_metric.append(BaseMetric('l5'))
    loss_metric.append(BaseMetric('l6'))
    loss_metric.append(BaseMetric('l7'))

    ## network
    # net = resnet18_v2(classes=136)
    net = get_mobilenet_v2(multiplier=1.0, classes=136)
    # net.hybridize()
    net.initialize(init=mx.init.Normal(sigma=0.01), force_reinit=True)
    if opts.pretrained_name is not None and opts.pretrained_dir is not None:
        logger.info('loading pre-trained {} ...'.format(
            os.path.join(opts.pretrained_dir, opts.pretrained_name)))
        net.load_params(os.path.join(opts.pretrained_dir,
                                     opts.pretrained_name),
                        ctx=devices,
                        allow_missing=True)
        logger.info('load pre-trained model complete.')
    net.collect_params().reset_ctx(devices)
    logger.info('net: {}'.format(net))

    ## loss
    loss_funcs = []
    # loss_funcs.append(mx.gluon.loss.SoftmaxCrossEntropyLoss())
    loss_funcs.append(mx.gluon.loss.L1Loss())
    # loss_funcs.append(mx.gluon.loss.L2Loss())
    # loss_funcs.append(L2Loss())

    ## optimizer
    lr_scheduler = mx.lr_scheduler.FactorScheduler(step=3,
                                                   factor=0.1,
                                                   base_lr=opts.learning_rate)

    ## trainer
    trainer = mx.gluon.Trainer(
        net.collect_params(),
        opts.optimizer,
        {
            'learning_rate': opts.learning_rate,
            # 'momentum': opts,
            'wd': opts.weight_decay
            # 'lr_scheduler': lr_scheduler
        })

    ## datsets
    logger.info('loading datasets ...')
    train_iter = RecDataIterV1(rec_dir=opts.rec_dir,
                               rec_prefix=opts.rec_prefix,
                               batch_size=opts.batch_size,
                               shuffle=opts.shuffle)
    logger.info('load datasets complete.')

    params = net.collect_params()
    param_names = params.keys()

    logger.info('starting training ...',
                show_type=logger.LOG_STYLE.DEFAULT,
                forground=logger.LOG_FRONT_COLOR.RED,
                background=logger.LOG_BACK_COLOR.DEFAULT)
    start_time = time.time()
    global_step = 0
    for epoch in range(opts.min_epoch, opts.max_epoch):
        for idx_step, batch_data in enumerate(iter(train_iter)):
            datas = mx.gluon.utils.split_and_load(batch_data.data[0],
                                                  ctx_list=devices,
                                                  batch_axis=0)
            targets = mx.gluon.utils.split_and_load(batch_data.label[0],
                                                    ctx_list=devices,
                                                    batch_axis=0)
            losses = []
            with mx.autograd.record():
                for datai, targeti in zip(datas, targets):
                    # print('[===]  datai: {}  {}  {}'.format(datai.shape, datai.dtype, type(datai)))
                    # print('[===]  targeti: {}  {}  {}'.format(targeti.shape, targeti.dtype, type(targeti)))
                    datai = datai / 255.0
                    # predicts = net(datai)
                    predicts = net(datai)
                    # logger.info('predicts: {}  {}, {}  {}'.format(
                    #     type(predicts), predicts.shape, mx.nd.max(predicts), mx.nd.min(predicts)
                    # ))

                    targeti = targeti / 128.0
                    # print('[===] targeti min-max: {}  {}'.format(mx.nd.max(targeti[0, :]).asnumpy()[0], mx.nd.min(targeti[0, :]).asnumpy()[0]))
                    # print('[===] predicts min-max: {}  {}'.format(mx.nd.max(predicts[0, :]).asnumpy()[0], mx.nd.min(predicts[0, :]).asnumpy()[0]))
                    mse_loss1 = loss_funcs[0](targeti, predicts[0])
                    mse_loss2 = loss_funcs[0](targeti[:, 0:34], predicts[1])
                    mse_loss3 = loss_funcs[0](targeti[:, 34:44], predicts[2])
                    mse_loss4 = loss_funcs[0](targeti[:, 44:54], predicts[3])
                    mse_loss5 = loss_funcs[0](targeti[:, 54:72], predicts[4])
                    mse_loss6 = loss_funcs[0](targeti[:, 72:84], predicts[5])
                    mse_loss7 = loss_funcs[0](targeti[:, 84:96], predicts[6])
                    mse_loss8 = loss_funcs[0](targeti[:, 96:136], predicts[7])
                    mse_loss = mse_loss1 + mse_loss2 + mse_loss3 + mse_loss4 + mse_loss5 + mse_loss6 + mse_loss7 + mse_loss8
                    losses.extend(mse_loss)
                    # print('target: {}'.format(targeti))
                    # print('predicts: {}'.format(predicts))
                    # print('loss: {}'.format(mse_loss.shape))
                    # print('[===] {}'.format(mse_loss))

                    # correct_count, num_samples = acc(preds=predicts, targets=targeti)

                    ## update metrics
                    loss_metric[0].update(mse_loss.shape[0],
                                          mse_loss.asnumpy().sum())
                    loss_metric[1].update(mse_loss1.shape[0],
                                          mse_loss1.asnumpy().sum())
                    loss_metric[2].update(mse_loss2.shape[0],
                                          mse_loss2.asnumpy().sum())
                    loss_metric[3].update(mse_loss3.shape[0],
                                          mse_loss3.asnumpy().sum())
                    loss_metric[4].update(mse_loss4.shape[0],
                                          mse_loss4.asnumpy().sum())
                    loss_metric[5].update(mse_loss5.shape[0],
                                          mse_loss5.asnumpy().sum())
                    loss_metric[6].update(mse_loss6.shape[0],
                                          mse_loss6.asnumpy().sum())
                    loss_metric[7].update(mse_loss7.shape[0],
                                          mse_loss7.asnumpy().sum())

            mx.autograd.backward(losses)
            # for loss in losses:
            #     loss.backward()
            #     mx.autograd.backward(loss)
            trainer.step(batch_data.data[0].shape[0])
            mx.nd.waitall()

            ## log
            elapse_time = time.time() - start_time
            samples_per_second = 0
            if global_step > 30:
                start_time = time.time()
                samples_per_second = int(
                    opts.batch_size * opts.checkpoint_interval / elapse_time)
            if (global_step + 1) % opts.log_interval == 0:
                logger.info(
                    '[{}/{}][{}/{}] [loss: {:.6f} / {:.6f} / {:.4f} / {:.4f} / {:.4f} / {:.4f} / {:.4f} / {:.4f} / {:.4f}]  [{} samples/s]  [lr: {:.10f}]'
                    .format(epoch, opts.max_epoch - opts.min_epoch, idx_step,
                            int(train_iter.max_index / opts.batch_size),
                            loss_metric[0].get_avg()[0],
                            loss_metric[0].get_avg()[1],
                            loss_metric[1].get_avg()[0],
                            loss_metric[2].get_avg()[0],
                            loss_metric[3].get_avg()[0],
                            loss_metric[4].get_avg()[0],
                            loss_metric[5].get_avg()[0],
                            loss_metric[6].get_avg()[0],
                            loss_metric[7].get_avg()[0], samples_per_second,
                            lr_scheduler.base_lr))

            ## update metrics

            ## update global step
            global_step += 1

        ## update trainer
        train_iter.reset()

        ## update learning rate
        lr_scheduler(epoch - opts.min_epoch)
        trainer.set_learning_rate(lr_scheduler.base_lr)
        print('lr: ', lr_scheduler.base_lr, epoch - opts.min_epoch)

        ## save checkpoint
        if (epoch + 1) % opts.checkpoint_interval == 0:
            checkpoint_path = os.path.join(
                opts.checkpoint_dir,
                opts.checkpoint_prefix + '{}'.format(epoch))
            logger.info(
                'begin save checkpoints {} ...'.format(checkpoint_path),
                show_type=logger.LOG_STYLE.DEFAULT,
                forground=logger.LOG_FRONT_COLOR.RED,
                background=logger.LOG_BACK_COLOR.DEFAULT)
            net.save_params(filename=checkpoint_path)
            logger.info('complete save checkpoints.',
                        show_type=logger.LOG_STYLE.DEFAULT,
                        forground=logger.LOG_FRONT_COLOR.RED,
                        background=logger.LOG_BACK_COLOR.DEFAULT)
Ejemplo n.º 8
0
try:
    dw = DWOperation(meta=meta, logger=logger)

    try:
        check_sql = 'SELECT COUNT(*) FROM %s.DIM_VENDOR_RETAILER WHERE VENDOR_KEY=%s AND RETAILER_KEY=%s' % (
            schema_name, str(options.vendor_key), str(options.retailer_key))
        count = dw.query_scalar(check_sql)
        if count == 0:
            insert_sql = 'INSERT INTO %s.DIM_VENDOR_RETAILER (VENDOR_KEY, RETAILER_KEY) VALUES(%s, %s)' % (
                schema_name, str(options.vendor_key), str(
                    options.retailer_key))
            dw.execute(insert_sql)
    except pyodbc.ProgrammingError:
        logger.info('%s.DIM_VENDOR_RETAILER doesn'
                    't exist, if you run updateSQL before update' %
                    schema_name)
    logger.info(
        'vendor_key=%s, retailer_key=%s has been inserted into %s.DIM_VENDOR_RETAILER'
        % (options.vendor_key, options.retailer_key, schema_name))

    try:
        check_sql = 'SELECT COUNT(*) FROM %s.DIM_INCIDENT_ID_START WHERE VENDOR_KEY=%s AND RETAILER_KEY=%s' % (
            schema_name, str(options.vendor_key), str(options.retailer_key))
        count = dw.query_scalar(check_sql)
        if count == 0:
            populate_incident_key_start(dw, options.vendor_key,
                                        options.retailer_key)
    except pyodbc.ProgrammingError:
        logger.info('%s.DIM_INCIDENT_ID_START doesn'
                    't exist, if you run updateSQL before update' %
Ejemplo n.º 9
0
class RemoveDuplicatedRows(SparkOperator):
    '''Remove duplicated rows from data with selected columns'''

    OP_NAME = 'remove-duplicated-rows'
    OP_CATEGORY = 'data-transformation'

    def __init__(self):
        super(RemoveDuplicatedRows, self).__init__()
        self.op_input_num = 1
        self.op_output_num = 1
        self.op_status = OperatorStatus.INIT
        self.op_script_location = 'resources/spark_operators/data_transformation/remove_duplicated_rows.py'
        self.op_backend = 'spark'

        self.columns = None

    def init_operator(self, op_json_param):
        self.op_json_param = op_json_param
        self.op_input_ops = op_json_param['input-ops']
        self.op_input_ops_index = op_json_param['input-ops-index']
        self.op_running_mode = self.op_json_param[
            'running-mode'] if 'running-mode' in self.op_json_param else 'script'
        self.op_local = bool(self.op_json_param['local']
                             ) if 'local' in self.op_json_param else True
        self.op_working_directory = self.op_json_param[
            'op-working-directory'] if 'op-working-directory' in self.op_json_param else None
        self.op_logger = Logger(self.op_working_directory +
                                '/log/import-csv_' +
                                str(self.op_json_param['op-index']))

        self.columns = op_json_param[
            'columns'] if 'columns' in op_json_param else None

    def run_function_mode(self):
        return self.op_status

    def run_script_mode(self):
        run_command = 'spark-submit --master '
        if self.op_local:
            run_command = run_command + 'local[2] '

        self.op_result.append(self.op_working_directory + 'output/' +
                              self.op_json_param['op-index'] + '-output')
        run_command = run_command + self.op_script_location + ' ' + self.op_input_ops[
            0].get_result(self.op_input_ops_index[0]
                          ) + ' ' + self.op_result[0] + ' ' + self.columns
        sub_proc = subprocess.Popen(run_command,
                                    stdout=subprocess.PIPE,
                                    shell=True)

        for line in iter(sub_proc.stdout.readline, b''):
            self.op_logger.info(line)

        sub_proc.stdout.close()
        sub_proc.wait()
        return sub_proc.returncode

    def azkaban_script(self):
        run_command = 'spark-submit --master '
        if self.op_local:
            run_command = run_command + 'local[2] '

        self.op_result.append(self.op_working_directory + 'output/' +
                              self.op_json_param['op-index'] + '-output')
        run_command = run_command + self.op_script_location + ' ' + self.op_input_ops[
            0].get_result(self.op_input_ops_index[0]
                          ) + ' ' + self.op_result[0] + ' ' + self.columns
        return run_command
Ejemplo n.º 10
0
class DealEngine:
    def __init__(self, stock_id, db_conn = None, redis_conn = None):
        self.stock_id = stock_id
        self.stock_name = 'Not Set'
        self.pair_queue = PairQueue(stock_id)
        self.logger = Logger('engine')
        self.db_conn = db_conn
        self.limit = 2
        self.close_price = 10
        now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
        self.stock_name = 'sss'



        if redis_conn is None:
            self.r = redis.Redis()
        else:
            self.r = redis_conn
        cursor = self.db_conn.cursor()
        try:
            cursor.execute('select stock_name from stock_set where stock_id=%s',
                           [str(self.stock_id)])
            result = cursor.fetchall()[0]
            self.stock_name = result[0]
            cursor.execute('select gains,decline,status,close_price from stock_state where stock_id=%s',[str(self.stock_id)])
            result = cursor.fetchall()
            self.limit = result[0][0]
            self.gains = self.limit
            self.decline = result[0][1]
            status = int(result[0][2])
            self.close_price = float(result[0][3])
            self.last_price = self.close_price
            self.exist = True
            self.r.hset(stock_id, 'engine_exist', True)
            if status == 1:
                self.redis_init(True,True,self.gains,self.decline,self.close_price)
                self.on = True
                print(str(stock_id) + " is running")
            else:
                self.redis_init(False,True,self.gains,self.decline,self.close_price)
                self.on = False
                print(str(stock_id) + " is pending")
            # self.last_price = float(self.r.hget(self.stock_id,'newest_price').decode('utf-8'))

            # self.close_price = self.last_price
            # if self.close_price == 0:
            #     self.close_price = 10
        except Exception as e:
            self.redis_init(False,False,0,0,0)
            self.close_price = 0
            self.on = False
            self.last_price = 0
            print( str(stock_id) +" fails: " + str(e))
            self.exist = False
        finally:
            self.set_open_price()
            # cursor.execute('insert into today_stock (stock_id,stock_name,price,date) values (%s,%s,%s,%s)',
            #                [self.stock_id, self.stock_name, self.close_price, now])
            self.db_conn.commit()
            cursor.close()


        # self.last_price = 4
        # self

    def redis_init(self,status,engine_exists,gains,decline,close_price):
        mapping = {
            'stock_id': self.stock_id,
            'stock_name': self.stock_name,
            'status': status,
            'last_price': close_price,
            'newest_price': close_price,
            'newest': close_price,
            'gains': gains,
            'decline': decline,
            'engine_exist':engine_exists,
            'long_count':0,
            'short_count':0
        }
        self.r.hmset(self.stock_id, mapping)

    def set_open_price(self):
        cursor = self.db_conn.cursor()
        cursor.execute("select max(date) from previous_stock where stock_id = %s",[self.stock_id])
        last_date = cursor.fetchall()[0][0]
        print(last_date)
        cursor.execute("select end_price from previous_stock where stock_id=%s and date=%s",[self.stock_id,last_date])
        price = float(cursor.fetchall()[0][0])
        self.r.hset(self.stock_id,'newest_price',price)
        now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
        cursor.execute('insert into today_stock (stock_id,stock_name,price,date) values (%s,%s,%s,%s)',[self.stock_id,self.stock_name,price,now])

    def is_exist(self):
        return self.exist
    def on_trading(self):
        if str(self.stock_id) == '101':
            return True
        status = self.r.hget(self.stock_id,'status'.encode('utf-8')).decode('utf-8')
        status = bool(status)
        if self.on is True and status is False:
            print(str(self.stock_id) + ' is stopped')
        elif self.on is False and status is True:
            print(str(self.stock_id) + ' is started')
        self.on = status
        if not self.exist:
            return False

        return self.on

        # if self.on:
        #     return True
        # else:
        #
        #     # time.sleep(1)
        #     cursor = self.db_conn.cursor()
        #     cursor.execute('select status from stock_state where stock_id=%s',[self.stock_id])
        #     status = int(cursor.fetchall()[0][0])
        #     if status ==1 :
        #         self.on = status
        #         return True
        #     else:
        #         return False

    def on_stop(self):
        self.on = False
        self.r.hset(self.id,'status',False)
        pass

    def on_resume(self):
        self.on = True
        self.r.hset(self.id, 'status', True)

    def on_close(self):
        self.on = False
        self.exist = False
        self.r.hest(self.id,'engine_exist',False)
        self.r.hset(self.id, 'status', False)

    def save_deal(self, result,buy_order,sell_order):
        cursor = self.db_conn.cursor()
        # result.price = 2
        # result.buy_id = 1
        # result.sell_id = 1
        print('-----------------------')
        buy_fund_id = 'F' + str(result.buy_id)
        sell_fund_id = 'F' + str(result.sell_id)
        cursor.execute('select security_account from fund_account_user where username=%s',(sell_fund_id,))
        sell_id = cursor.fetchall()[0][0]
        cursor.execute('select security_account from fund_account_user where username=%s', (buy_fund_id,))
        buy_id = cursor.fetchall()[0][0]
        cursor.execute('select enabled_money from fund_account_user where username=%s', [buy_fund_id])
        buy_money = float(cursor.fetchall()[0][0])
        cursor.execute('select enabled_money from fund_account_user where username=%s', [sell_fund_id])
        sell_money = float(cursor.fetchall()[0][0])
        buy_money -= result.volume * result.price
        sell_money += result.volume * result.price
        if buy_money < 0:
            return
        cursor.execute('select freezing_amount from security_in_account where username=%s', [sell_id])
        sell_freezing_security = int(cursor.fetchall()[0][0])
        sell_freezing_security -= result.volume

        #---------------------------------------------------------------------------------------------------------------
        # change security
        cursor.execute('select amount from security_in_account where username=%s', [sell_id])
        sell_security = int(cursor.fetchall()[0][0])
        sell_security -= result.volume
        sql = "update security_in_account set amount = %s, freezing_amount = %s  where username = '******'" % (sell_security,sell_freezing_security,str(sell_id))
        cursor.execute(sql)
        # quantity = result.price * result.volume

        cursor.execute('select amount from security_in_account where username=%s', [buy_id])
        result_c = cursor.fetchall()
        if result_c:
            buy_security = int(result_c[0][0])
        else:
            buy_security = 0
        buy_security += result.volume
        if result_c:
            cursor.execute('update security_in_account set amount = %s where username = %s',
                       [buy_security, buy_id])
        else:
            cursor.execute('insert into security_in_account (username, security_number,security_name ,amount,total_price,freezing_amount ) values (%s,%s,%s,%s,%s,%s)',
                           [buy_id,self.stock_id,self.stock_name,buy_security,1000,0])

        #---------------------------------------------------------------------------------------------------------------
        cursor.execute('select freezing_money from fund_account_user where username=%s', [buy_fund_id])
        buy_freezing_money = float(cursor.fetchall()[0][0])
        buy_freezing_money -= float(buy_order.get_price()) * result.volume

        cursor.execute('select freezing_money from fund_account_user where username=%s', [sell_fund_id])
        sell_freezing_money = float(cursor.fetchall()[0][0])
        sell_freezing_money -= float(sell_order.get_price()) * result.volume
        cursor.execute('update fund_account_user set enabled_money=%s, freezing_money = %s where username=%s',[buy_money,buy_freezing_money,buy_fund_id])
        cursor.execute('update fund_account_user set enabled_money=%s, freezing_money = %s where username=%s',[sell_money,sell_freezing_security,sell_fund_id])
        # cursor.execute('update fund_account_user set enabled_money=%s where username=%s',[sell_money,result.sell_id])
        now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
        cursor.execute('insert into today_stock (stock_id,stock_name,price,date) values (%s,%s,%s,%s)',[self.stock_id,self.stock_name,result.price,now])
        cursor.execute('insert into trade_log (stock_id,buy_id,sell_id,price,volume) values (%s,%s,%s,%s,%s) ',[result.stock_id,buy_fund_id,sell_fund_id,int(result.price),result.volume])

        self.db_conn.commit()
        cursor.close()
        self.r.hset(result.stock_id,'last_price',result.price)
        self.r.hset(result.stock_id,'newest_price',result.price)
        self.r.hset(result.stock_id,'newest',result.price)
        print("deal finished")

    def deal(self):
        long_order, short_order = self.pair_queue.get_first_order()

        if not long_order:
            # self.logger.info("No Order Now")
            return False

        if not short_order:
            # self.pair_queue.push(long_order)
            # self.logger.info("No Order Now")
            return False
        a = ctypes.c_double(100000.0)
        print('+++')
        print(self.close_price)
        b = ctypes.c_double(self.close_price)
        dll = ctypes.CDLL('./deal/libdeal.so')
        dll.Deal.restype = exchange
        dll.Deal.argtypes = [
            stock,stock, ctypes.c_double, ctypes.c_double
        ]
        # converted_long_order = order_conversion(long_order)
        # converted_short_order = order_conversion(short_order)
        # # print(converted_long_order.buy_id)
        # result = dll.Deal(converted_long_order, converted_short_order, a, b)
        # print(result.volume)
        # self.save_deal(result, long_order, short_order)
        # re_order = regenerate_order(result, long_order, short_order)
        # if re_order:
        #     self.pair_queue.push(re_order)
        # else:
        #     pass
        # self.logger.info("Success")
        try:
            converted_long_order = order_conversion(long_order)
            converted_short_order = order_conversion(short_order)
            print(long_order.volume)
            result = dll.Deal(converted_long_order,converted_short_order,a,b)
            if int(result.volume) == 0:
                raise ctypes.ArgumentError('deal fail')
            print("finish")
            print(result.volume)
            self.save_deal(result,long_order,short_order)
            re_order = regenerate_order(result,long_order,short_order)
            if re_order:
                print("re_order")
                print(re_order)
                self.pair_queue.push(re_order)
            else:
                pass
            self.pair_queue.remove(long_order.id,LONG)
            self.pair_queue.remove(short_order.id,SHORT)
            self.logger.info("Success")
        except ctypes.ArgumentError as e:
            print("in except: " + str(e))
            # self.pair_queue.push(long_order)
            # self.pair_queue.push(short_order)

        # converted_long_order = order_conversion(long_order)
        # converted_short_order = order_conversion(short_order)
        #
        # # dll = ctypes.CDLL('./deal/libdeal.so')
        #
        # a = ctypes.c_double(self.limit)
        # b = ctypes.c_double(self.last_price)
        # # dll.Deal.restype = exchange
        # result = dll.Deal(converted_long_order, converted_short_order,a,b)
        # self.save_deal(result,long_order,short_order)
        # re_order = regenerate_order(result=result)
        # # self.pair_queue.push(re_order)
        self.logger.info("Success")
        return True
    def single_run(self):
        self.deal()

    def run(self):
        while True:
            if str(self.stock_id) is '101':
                print('101' + str(self.on_trading()))
            if self.on_trading():
                self.deal()
            else:
                pass
Ejemplo n.º 11
0
                target="console",
                vendor_key=options.vendor_key,
                retailer_key=options.retailer_key)
dw = DWOperation(meta=meta, logger=logger)

try:
    for s in schemas:
        dw.execute('''
CREATE SCHEMA IF NOT EXISTS %(schema_name)s DEFAULT INCLUDE SCHEMA PRIVILEGES;
GRANT USAGE ON SCHEMA %(schema_name)s TO read_internalusers;
GRANT SELECT ON SCHEMA %(schema_name)s TO read_internalusers;
GRANT USAGE ON SCHEMA %(schema_name)s TO write_internalusers;
GRANT SELECT,INSERT,UPDATE,DELETE,TRUNCATE ON SCHEMA %(schema_name)s TO write_internalusers;
        ''' % {'schema_name': s})
        logger.info(
            '%s is created, and permission has been granted for read_internalusers and write_internalusers.'
            % s)

        for env in ['uat', 'pp', 'Prod']:
            try:
                dw.execute('''
GRANT USAGE ON SCHEMA %(schema_name)s TO %(env)sIRISolapvtc;
GRANT SELECT ON SCHEMA %(schema_name)s TO %(env)sIRISolapvtc;
                ''' % {
                    'schema_name': s,
                    'env': env
                })
                logger.info(
                    '%(schema_name)s permission has been granted for %(env)sIRISolapvtc.'
                    % {
                        'schema_name': s,
Ejemplo n.º 12
0
                            init_flag=True,
                            logger=logger)
        run_init.main_process()

    except Exception as msg:
        logger.warning(msg)
        raise

    finally:
        pass


if __name__ == '__main__':
    try:
        logger.info(
            "Trying to sync feedback data from RDP for vendor:{0} and retailer:{1}"
            .format(options.vendor_key, options.retailer_key))

        # getting meta
        # SEP = os.path.sep
        # cwd = os.path.dirname(os.path.realpath(__file__))
        # generic_main_file = cwd + SEP + '..' + SEP + '..' + SEP + '..' + SEP + 'script' + SEP + 'main.py'
        # CONFIG_FILE = cwd + SEP + '..' + SEP + '..' + SEP + '..' + SEP + 'config' + SEP + 'config.properties'
        # exec(open(generic_main_file).read())
        # trigger_init_feedback(meta=meta, vendor_key=684, retailer_key=158, debug='Y')  # testing

        trigger_init_feedback(meta=meta,
                              vendor_key=options.vendor_key,
                              retailer_key=options.retailer_key)

    finally:
Ejemplo n.º 13
0
class MasterHandler(RequestHandler, ABC):

    executor = ThreadPoolExecutor(MAX_EXECUTOR)

    def initialize(self, meta):
        self.meta = meta
        self.logger = Logger(log_level="info", vendor_key=-1, retailer_key=-1)
        self.service_name = None
        self.is_sync_service = False
        self.is_notifiable = True

    def prepare(self):
        self.set_header('Content-Type', 'application/json')
        self.logger.info("Accepted query arguments: %s" %
                         self.request.query_arguments)
        self.query_arguments = {
            k: self.get_query_argument(k)
            for k in self.request.query_arguments
        }
        self.logger.info("Ajusted query arguments: %s" % self.query_arguments)
        self.request_body = {}
        if 'Content-Type' in self.request.headers and self.request.headers[
                "Content-Type"].startswith("application/json"):
            self.request_body = json.loads(
                self.request.body.decode('utf-8'))  # it may get list
            self.logger.info("Accepted post: %s" % self.request_body)

        osa_backend_base_dir = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), "..", "..", "..")
        service_config = os.path.join(osa_backend_base_dir, 'common', 'config',
                                      'services.json')
        with open(service_config) as f:
            services = json.load(f)

        self.set_service_properties()
        if not self.service_name:
            raise ValueError('service_name is empty.')
        elif not self.service_name in services:
            raise ValueError('%s is not found.' % self.service_name)

        self.logger.set_keys(module_name=self.service_name)
        self.service = services[self.service_name]
        module_name = self.service['module_name']
        class_name = self.service['class_name']
        module = __import__(module_name, fromlist=[class_name])
        self.klass = getattr(module, class_name)

    @run_on_executor
    @Notifier
    def async_main(self, params):
        self.cls = self.klass(meta=copy.deepcopy(self.meta),
                              request_body=copy.deepcopy(params),
                              logger=self.logger)  # init class_name
        return eval('self.cls.%s' % self.service['class_main_func']
                    )  # call main func and return result

    @Notifier
    def sync_main(self, params):
        self.cls = self.klass(meta=copy.deepcopy(self.meta),
                              request_body=copy.deepcopy(params),
                              logger=self.logger)
        return eval('self.cls.%s' % self.service['class_main_func'])

    def post(self):
        if self.is_sync_service:
            msg = self.sync_main(self.request_body)
            self.send_response(msg)
        else:
            self.async_main(self.request_body)
            msg = '%s - Running %s...' % (datetime.now(), self.service_name)
            self.send_response(msg)

    def send_response(self, msg):
        job_params = {}
        if self.is_notifiable:
            job_required_keys = [
                'jobId', 'stepId', 'batchId', 'retry', 'groupName'
            ]
            job_params = dict([(k, self.request_body[k])
                               for k in job_required_keys
                               if k in self.request_body])
            job_params['status'] = StepStatus.RUNNING.value
        job_params['message'] = msg
        job_params['request_body'] = self.request_body
        self.write(json.dumps(job_params))
        self.flush()

    @abstractmethod
    def set_service_properties(self):
        '''
        [Mandatory] set self.service_name
        [Optional] call self.set_as_sync_service() to set self.is_sync_service = True
        [Optional] call self.set_as_not_notifiable() to set self.is_notifiable = False
        '''
        pass

    def set_as_sync_service(self):
        self.is_sync_service = True

    def set_as_not_notifiable(self):
        self.is_notifiable = False