Ejemplo n.º 1
0
 def __init__(self, args, unknown_args):
     self.args = args
     self.unknown_args = unknown_args
     self.option = checkInput(args)
     #
     env.logger = getLogger(max(min(args.verbosity - 1, 2), 0),
                            fn=os.path.splitext(args.output[0])[0],
                            fv=2 if args.verbosity is not 0 else 0)
     env.logger.debug('\n{0}\n{1}\n{0}'.format(
         "=" * min(len(args.cmd), 100), args.cmd))
     self.logger = env.logger.info if args.verbosity != 1 else printinfo
     #
     self.logger('Loading data from [{}] ...'.format(args.data))
     if self.option == 1:
         self.file = SFSFile(args.data)
     else:
         self.file = GFile(args.data)
     self.groups = self.file.getnames()
     self.logger('{:,d} units found'.format(len(self.groups)))
     # load non-missing data
     # to annotate to each variant position wether or not it is missing from assocation analysis
     # name it chip_file because it mimics the behavior of exome chip design
     if args.missing_unlisted:
         self.chip_file = SFSFile(args.missing_unlisted)
     else:
         self.chip_file = None
     # set limit
     if self.args.limit:
         self.limit = min(max(1, args.limit), len(self.groups))
         self.logger('{:,d} units will be analyzed'.format(self.limit))
     else:
         self.limit = len(self.groups)
     self.result = ResultManager(args.output,
                                 action='w' if not args.append else 'a')
     if self.args.verbosity == 1:
         # widgets = [FormatLabel('scanning: unit %(value)d - '), BouncingBar(marker=RotatingMarker())]
         widgets = [
             FormatLabel('scanning: unit %(value)d - '),
             Percentage(), ' ',
             Bar('>'), ' ',
             ETA()
         ]
         self.pbar = ProgressBar(widgets=widgets,
                                 maxval=self.limit,
                                 term_width=get_terminal_size()[0] -
                                 5).start()
     else:
         # use each group's progress bar or not progress bar at all
         self.pbar = ProgressBarNull()
     # this is buffer object to hold all input dict to a list
     self.data_buffer = [] if self.args.replicates < 0 else None
Ejemplo n.º 2
0
 def show_gdat():
     if '-v2' in unknown_args:
         try:
             print(runCommand('ptdump {}'.format(fn)))
         except:
             raise ValueError(
                 'Cannot display summary information. Make sure "{}" exists and "ptdump" is installed'
                 .format(fn))
     else:
         try:
             gf = GFile(fn)
             names = gf.getnames()
             gf.close()
         except:
             names = []
         for idx, name in enumerate(names):
             print('/%s' % name)
             if idx >= 50 and '-v0' in unknown_args:
                 remaining = len(names) - 50
                 if remaining:
                     printinfo(
                         '%s more items not displayed. Use "-v1/-v2" switch to see more.'
                         % remaining)
                 break
Ejemplo n.º 3
0
def showFields(fn, border, unknown_args):
    def set_style(pt):
        if border == 'less':
            pt.set_style(MSWORD_FRIENDLY)
        if border == 'no':
            pt.set_style(PLAIN_COLUMNS)

    #
    if fn.endswith('.csv'):
        pt = from_csv(openFile(fn), delimiter=',')
        set_style(pt)
        header = [x for x in pt.field_names if not x.startswith('_')]
        if len(unknown_args) == 0:
            print('\n'.join(header))
            return
        fields = [
            re.compile(item.replace('*', '(.*?)')) if '*' in item else item
            for item in unknown_args
        ]
        output = []
        for item in fields:
            if type(item) is str:
                item = [x for x in header if x == item]
            else:
                item = [x for x in header if re.match(item, x)]
            output.extend(item)
        print pt.get_string(fields=output)
    elif fn.endswith('.SEQPowerDB'):
        if not os.path.isfile(fn):
            raise OSError('Cannot find {}'.format(fn))
        rs = ResultManager(fn)
        pt = PrettyTable()
        set_style(pt)
        # show tables
        if len(unknown_args) == 0:
            pt.add_column('TABLES', rs.get_tables())
            print pt
            return
        table = unknown_args[0]
        if table not in rs.get_tables():
            raise ValueError("Cannot find table '{}'".format(table))
        if '--debug' in unknown_args:
            debug = True
            unknown_args.pop(unknown_args.index('--debug'))
        else:
            debug = False
        if '--condition' in unknown_args:
            fields = unknown_args[1:unknown_args.index('--condition')]
            condition = ' '.join(
                unknown_args[(unknown_args.index('--condition') + 1):])
        else:
            fields = unknown_args[1:]
            condition = None
        # show fields
        header = sorted(rs.get_fields(table),
                        key=lambda x: x.replace("_", "|").replace(
                            'method', 'AAA').replace('power', 'AAB'))
        if len(fields) == 0:
            pt.add_column(table, header)
            pt.align[table] = "l"
            print pt
        else:
            names = [x for x in fields if x in header]
            select_query = "SELECT {} from {} {}".format(
                ','.join(names), table, condition if condition else '')
            if debug:
                sys.stderr.write(select_query + '\n')
            pt.field_names = names
            for item in rs.cur.execute(select_query).fetchall():
                pt.add_row(item)
            print pt
    elif fn.split('.')[-1] in ['gdat', 'h5', 'hdf5']:

        def show_gdat():
            if '-v2' in unknown_args:
                try:
                    print(runCommand('ptdump {}'.format(fn)))
                except:
                    raise ValueError(
                        'Cannot display summary information. Make sure "{}" exists and "ptdump" is installed'
                        .format(fn))
            else:
                try:
                    gf = GFile(fn)
                    names = gf.getnames()
                    gf.close()
                except:
                    names = []
                for idx, name in enumerate(names):
                    print('/%s' % name)
                    if idx >= 50 and '-v0' in unknown_args:
                        remaining = len(names) - 50
                        if remaining:
                            printinfo(
                                '%s more items not displayed. Use "-v1/-v2" switch to see more.'
                                % remaining)
                        break

        #
        if '--from' in unknown_args:
            for item in unknown_args[(unknown_args.index('--from') + 1):]:
                prefix, surfix = os.path.splitext(os.path.basename(item))
                if surfix in ['.gdat', '.h5', '.hdf5']:
                    runCommand(
                        'h5copy -v -i {0} -o {1} -s "/{2}" -d "/{2}"'.format(
                            item, fn, re.sub(r'[^a-zA-Z0-9_]', '_', prefix)),
                        accepted_rc=[0, 1])
                    if not '-v0' in unknown_args:
                        printinfo('File {} processed!'.format(item))
        if '--to' in unknown_args:
            target = unknown_args[(unknown_args.index('--to') + 1):]
            target = target[0] if target else os.path.splitext(fn)[0]
            runCommand('mkdir -p {}'.format(target))
            gf = GFile(fn)
            names = gf.getnames()
            gf.close()
            for name in gf.getnames():
                dat = GData(fn, name)
                if not '-v0' in unknown_args:
                    printinfo('Saving files {}'.format(
                        os.path.join(target, '{}.*.txt'.format(name))))
                dat.decompress()
                for key in dat:
                    np.savetxt(os.path.join(target,
                                            '{}.{}.txt'.format(name, key)),
                               dat[key],
                               fmt='%s',
                               delimiter='\t')
        show_gdat()
    else:
        raise ValueError('Unsupported file type {}'.format(fn))
    return