Exemple #1
0
def main(argv):
    # global exp_setting
    exp_setting = Configuration()
    debug_mode = list()
    reduced_mode = False
    test_mode = False
    seq_type = None
    gene_prot = None
    feature_set = list()
    percentile_range = list()

    args = parser.parse_args(argv[1:])
    if len(argv) <= 1:
        parser.parse_args(['--help'])
        return

    # set version
    exp_setting.set_version(settings.DEV_VERSION)
    # Show Version
    version = exp_setting.get_version()
    print('Version:', version.get_version())

    # Get Cutoffs
    cutoffs = Cutoffs()
    cutoffs.query_cutoffs('95, 0, -5')
    exp_setting.set_cutoffs(cutoffs)
    print('Cutoffs data Initialized.')

    # enable debugging mode
    if args.enable_debug:
        if set(args.enable_debug) & enable_debug:
            #debug_mode = [1, 100000]
            debug_mode = [1, 1000]
            reduced_mode = True

    if args.use_real_db:
        if set(args.use_real_db) & choice_yes:
            print('USING TEST DB: NO (USEING REAL/PRODUCTION DB)')
        else:
            settings.conn_string = settings.conn_string_test
            print('USING TEST DB: YES')

    if set(args.test_mode) & choice_yes:
        exp_setting.set_test_mode(True)
        print('TEST MODE: YES')
    else:
        exp_setting.set_test_mode(False)
        print('TEST MODE: NO')

    # ignore zero values
    if set(args.ignore_zero) & choice_yes:
        exp_setting.set_ignore_null(True)
        print('Ignore zero values: YES')
    else:
        exp_setting.set_ignore_null(False)
        print('Ignore zero values: NO')

    # Gene info loading mode
    if args.gene_load_mode:
        if set(args.gene_load_mode) & gene_load_mode_pl:
            exp_setting.set_gene_load_mode(settings.GN_LD_MODE_PL)
            print('Gene loading mode: pre-load')
        elif set(args.gene_load_mode) & gene_load_mode_dl:
            exp_setting.set_gene_load_mode(settings.GN_LD_MODE_DL)
            print('Gene loading mode: dynamic load')

    # sequence type
    if args.seq_type:
        if set(args.seq_type) & seq_type_pep:
            seq_type = 'p'
            print('sequence type: amino acid (peptide)')
        elif set(args.seq_type) & seq_type_dna:
            seq_type = 'd'
            print('sequence type: DNA')
        elif set(args.seq_type) & seq_type_pmt:
            seq_type = 'm1'
            print('sequence type: Promoter data')
            # set missing gnids in promoter data
            exp_setting.set_missing_gnids_in_promoter()
        elif set(args.seq_type) & seq_type_rda:
            seq_type = 'p'
            print('sequence type: Reduced Alphabet')
            settings.RA_MODE = True
    else:  # default
        seq_type = 'p'
        print('sequence type: amino acid (peptide) - Default')
    exp_setting.set_seq_type(seq_type)

    # gp_type
    gp_type = 'g'
    if args.gp_type:
        if set(args.gp_type) & gp_type_g:
            gp_type = 'g'
            print('gp type: g')
        elif set(args.gp_type) & gp_type_p:
            gp_type = 'p'
            print('gp type: p')
        elif set(args.gp_type) & gp_type_b:
            gp_type = 'b'
            print('gp type: b')
        else:
            gp_type = 'g'
            print('gp type: g (default)')
    exp_setting.set_gp_type(gp_type)

    # assign feature groups
    if args.feature_group:
        if set(args.feature_group) & feature_group_gl:
            print('new feature group: gene low expressed')
            Features.gene_low_exp()
        if set(args.feature_group) & feature_group_gh:
            print('new feature group: gene high expressed, top 5%')
            Features.gene_high_exp()
        if set(args.feature_group) & feature_group_gh10:
            print('new feature group: gene high expressed, top 10%')
            Features.gene_high_exp_t10()
        if set(args.feature_group) & feature_group_gt:
            print('new feature group: gene for each tissue, top 10%')
            Features.gene_tissues()

    # feature set
    if args.feature_set:
        feature_set = args.feature_set
        print('feature set: {}'.format(feature_set))

    # set negative class mode
    if args.neg_class_mode:
        neg_class_mode = args.neg_class_mode
        print('NEG_CLASS_MODE:', neg_class_mode)
        if neg_class_mode in (settings.NEG_CLASS_MODE_NOT_P,
                              settings.NEG_CLASS_MODE_RND_S,
                              settings.NEG_CLASS_MODE_RND_M):
            exp_setting.set_neg_class_mode(neg_class_mode)
        else:
            error_mesg = 'NEG_CLASS_MODE:', neg_class_mode, 'is UNKNOWN.'
            raise ValueError(error_mesg)

    # set percentile for new feature set
    if args.percentile:
        percentile_range = args.percentile.split(',')
        percentile_range = [int(x)
                            for x in percentile_range]  # str -> int type
        print('Set percnetile range:', args.percentile)

    # set gp combo configurations
    feature_set_gp_comb = list()
    if args.multi_gp:
        multi_gp_conf = args.multi_gp
        for conf in multi_gp_conf:
            print(conf)
            conf_list = conf.split(':')
            feature_set_gp_comb.append(conf_list)
        print(feature_set_gp_comb)

    # class assignment for features
    if args.features:

        if set(args.features) & (settings.FN_GE_N | settings.FN_GE_B
                                 | settings.FN_PA_N | settings.FN_PA_B):
            if len(percentile_range) <= 0:
                raise ValueError(
                    'percentile range is empty. Please set percentile range.')
            '''
                It supports adding multiple features at the same time, so it needs to do independently as belows.
            '''
            if set(args.features) & settings.FN_GE_N:
                print('GE_N')
                is_top = True
                gp_type = 'g'
                feature_set_name = next(iter(settings.FN_GE_N))
                for percentile in range(percentile_range[0],
                                        percentile_range[1],
                                        percentile_range[2]):
                    add_feature_by_percentile(
                        gp_type=gp_type,
                        feature_set_name=feature_set_name,
                        percentile=percentile,
                        is_top=is_top)
            if set(args.features) & settings.FN_GE_B:
                print('GE_B')
                is_top = False
                gp_type = 'g'
                feature_set_name = next(iter(settings.FN_GE_B))
                for percentile in range(percentile_range[0],
                                        percentile_range[1],
                                        percentile_range[2]):
                    add_feature_by_percentile(
                        gp_type=gp_type,
                        feature_set_name=feature_set_name,
                        percentile=percentile,
                        is_top=is_top)
            if set(args.features) & settings.FN_PA_N:
                print('PA_N')
                is_top = True
                gp_type = 'p'
                feature_set_name = next(iter(settings.FN_PA_N))
                for percentile in range(percentile_range[0],
                                        percentile_range[1],
                                        percentile_range[2]):
                    add_feature_by_percentile(
                        gp_type=gp_type,
                        feature_set_name=feature_set_name,
                        percentile=percentile,
                        is_top=is_top)
            if set(args.features) & settings.FN_PA_B:
                print('PA_B')
                is_top = False
                gp_type = 'p'
                feature_set_name = next(iter(settings.FN_PA_B))
                for percentile in range(percentile_range[0],
                                        percentile_range[1],
                                        percentile_range[2]):
                    add_feature_by_percentile(
                        gp_type=gp_type,
                        feature_set_name=feature_set_name,
                        percentile=percentile,
                        is_top=is_top)

        if set(args.features) & settings.FN_GPCB:
            print('GE&PA Combination data')
            for conf in feature_set_gp_comb:
                add_feature_gp_comb(conf, exp_setting)

    # build feature vector
    if args.feature_vector:
        intervals = [1000]
        if set(args.feature_vector) & feature_vector:
            print('build feature vector')
            fs_set_idx = 0
            #build_feature_vector()
            if reduced_mode:
                #for i in range(1,58938, interval):
                for interval in intervals:
                    for i in range(1, 39324, interval):
                        #for i in range(16001,39324, interval):
                        debug_mode = [i, interval]
                        exp_setting.set_debug_mode(debug_mode)
                        build_feature_vector(exp_setting)
            else:
                for k in range(3, 8):
                    exp_setting.set_kmer_size(kmer_size=k)
                    exp_setting.set_genes_info(genes_info=None)
                    for fsid in feature_set:
                        # Version Info
                        print('Version:', settings.DEV_VERSION)

                        if fsid == 0:
                            # set feature info with dummy data for small assigned gene at random
                            fs_info = FeatureInfo(fsid=0,
                                                  fs_name='SM_RND',
                                                  gp_type='g',
                                                  class_size=2)
                            # Set assigned genes limit
                            assigned_genes_limit = [
                                int((x + 23 * fs_set_idx) * 10)
                                for x in range(1, 24)
                            ]
                            exp_setting.set_assigned_genes_limit(
                                assigned_genes_limit)
                            fs_set_idx += 1
                        else:
                            # get feature set info from DB
                            res_fs_info = Pgsql.Common.select_data(
                                sqls.get_feature_set, (fsid))
                            fs_info = FeatureInfo(
                                fsid=fsid,
                                fs_name=res_fs_info[0][0].strip(),
                                gp_type=res_fs_info[0][1].strip(),
                                class_size=int(res_fs_info[0][2]))
                        exp_setting.set_fs_info(fs_info)

                        # for test
                        print(
                            '### MESSAGE ### fsid: {}, fs_name: {}, gp_type: {}, class_size: {}'
                            .format(exp_setting.get_fsid(),
                                    exp_setting.get_fs_name(),
                                    exp_setting.get_gp_type(),
                                    exp_setting.get_class_size()))

                        debug_mode = [1, 0]
                        exp_setting.set_debug_mode(debug_mode)
                        build_feature_vector(exp_setting)

    # single step classification
    if args.validation_mode:
        if set(args.validation_mode) & validation_mode_rg:
            # reduced gene model
            intervals = [1000, 2000, 3000, 4000, 5000]
            print('validation - reduced genes model mode')

            for interval in intervals:
                for i in range(1, 39324, interval):
                    #for i in range(16001,39324, interval):
                    debug_mode = [i, interval]
                    exp_setting.set_debug_mode(debug_mode)
                    build_feature_vector(debug_mode=debug_mode,
                                         gene_prot=gene_prot,
                                         seq_type=seq_type)