Ejemplo n.º 1
0
 def __init__(self, args, unknown_args):
     self.args = args
     self.unknown_args = unknown_args
     self.option = checkInput(args)
     #
     env.logger = getLogger(max(min(args.verbosity - 1, 2), 0),
                            fn=os.path.splitext(args.output[0])[0],
                            fv=2 if args.verbosity is not 0 else 0)
     env.logger.debug('\n{0}\n{1}\n{0}'.format(
         "=" * min(len(args.cmd), 100), args.cmd))
     self.logger = env.logger.info if args.verbosity != 1 else printinfo
     #
     self.logger('Loading data from [{}] ...'.format(args.data))
     if self.option == 1:
         self.file = SFSFile(args.data)
     else:
         self.file = GFile(args.data)
     self.groups = self.file.getnames()
     self.logger('{:,d} units found'.format(len(self.groups)))
     # load non-missing data
     # to annotate to each variant position wether or not it is missing from assocation analysis
     # name it chip_file because it mimics the behavior of exome chip design
     if args.missing_unlisted:
         self.chip_file = SFSFile(args.missing_unlisted)
     else:
         self.chip_file = None
     # set limit
     if self.args.limit:
         self.limit = min(max(1, args.limit), len(self.groups))
         self.logger('{:,d} units will be analyzed'.format(self.limit))
     else:
         self.limit = len(self.groups)
     self.result = ResultManager(args.output,
                                 action='w' if not args.append else 'a')
     if self.args.verbosity == 1:
         # widgets = [FormatLabel('scanning: unit %(value)d - '), BouncingBar(marker=RotatingMarker())]
         widgets = [
             FormatLabel('scanning: unit %(value)d - '),
             Percentage(), ' ',
             Bar('>'), ' ',
             ETA()
         ]
         self.pbar = ProgressBar(widgets=widgets,
                                 maxval=self.limit,
                                 term_width=get_terminal_size()[0] -
                                 5).start()
     else:
         # use each group's progress bar or not progress bar at all
         self.pbar = ProgressBarNull()
     # this is buffer object to hold all input dict to a list
     self.data_buffer = [] if self.args.replicates < 0 else None
Ejemplo n.º 2
0
 def __init__(self, args, unknown_args):
     self.args = args
     self.unknown_args = unknown_args
     self.option = checkInput(args)
     #
     env.logger = getLogger(
         max(min(args.verbosity - 1, 2), 0),
         fn=os.path.splitext(args.output[0])[0],
         fv=2 if args.verbosity is not 0 else 0,
     )
     env.logger.debug("\n{0}\n{1}\n{0}".format("=" * min(len(args.cmd), 100), args.cmd))
     self.logger = env.logger.info if args.verbosity != 1 else printinfo
     #
     self.logger("Loading data from [{}] ...".format(args.data))
     if self.option == 1:
         self.file = SFSFile(args.data)
     else:
         self.file = GFile(args.data)
     self.groups = self.file.getnames()
     self.logger("{:,d} units found".format(len(self.groups)))
     # load non-missing data
     # to annotate to each variant position wether or not it is missing from assocation analysis
     # name it chip_file because it mimics the behavior of exome chip design
     if args.missing_unlisted:
         self.chip_file = SFSFile(args.missing_unlisted)
     else:
         self.chip_file = None
     # set limit
     if self.args.limit:
         self.limit = min(max(1, args.limit), len(self.groups))
         self.logger("{:,d} units will be analyzed".format(self.limit))
     else:
         self.limit = len(self.groups)
     self.result = ResultManager(args.output, action="w" if not args.append else "a")
     if self.args.verbosity == 1:
         # widgets = [FormatLabel('scanning: unit %(value)d - '), BouncingBar(marker=RotatingMarker())]
         widgets = [FormatLabel("scanning: unit %(value)d - "), Percentage(), " ", Bar(">"), " ", ETA()]
         self.pbar = ProgressBar(widgets=widgets, maxval=self.limit, term_width=get_terminal_size()[0] - 5).start()
     else:
         # use each group's progress bar or not progress bar at all
         self.pbar = ProgressBarNull()
     # this is buffer object to hold all input dict to a list
     self.data_buffer = [] if self.args.replicates < 0 else None
Ejemplo n.º 3
0
 def show_gdat():
     if '-v2' in unknown_args:
         try:
             print(runCommand('ptdump {}'.format(fn)))
         except:
             raise ValueError('Cannot display summary information. Make sure "{}" exists and "ptdump" is installed'.format(fn))
     else:
         try:
             gf = GFile(fn)
             names = gf.getnames()
             gf.close()
         except:
             names = []
         for idx, name in enumerate(names):
             print('/%s' % name)
             if idx >= 50 and '-v0' in unknown_args:
                 remaining = len(names) - 50
                 if remaining:
                     printinfo('%s more items not displayed. Use "-v1/-v2" switch to see more.' % remaining)
                 break
Ejemplo n.º 4
0
 def show_gdat():
     if '-v2' in unknown_args:
         try:
             print(runCommand('ptdump {}'.format(fn)))
         except:
             raise ValueError(
                 'Cannot display summary information. Make sure "{}" exists and "ptdump" is installed'
                 .format(fn))
     else:
         try:
             gf = GFile(fn)
             names = gf.getnames()
             gf.close()
         except:
             names = []
         for idx, name in enumerate(names):
             print('/%s' % name)
             if idx >= 50 and '-v0' in unknown_args:
                 remaining = len(names) - 50
                 if remaining:
                     printinfo(
                         '%s more items not displayed. Use "-v1/-v2" switch to see more.'
                         % remaining)
                 break
Ejemplo n.º 5
0
def showFields(fn, border, unknown_args):
    def set_style(pt):
        if border == 'less':
            pt.set_style(MSWORD_FRIENDLY)
        if border == 'no':
            pt.set_style(PLAIN_COLUMNS)
    #
    if fn.endswith('.csv'):
        pt = from_csv(openFile(fn), delimiter = ',')
        set_style(pt)
        header = [x for x in pt.field_names if not x.startswith('_')]
        if len(unknown_args) == 0:
            print('\n'.join(header))
            return
        fields = [re.compile(item.replace('*', '(.*?)')) if '*' in item else item for item in unknown_args]
        output = []
        for item in fields:
            if type(item) is str:
                item = [x for x in header if x == item]
            else:
                item = [x for x in header if re.match(item, x)]
            output.extend(item)
        print pt.get_string(fields=output)
    elif fn.endswith('.SEQPowerDB'):
        if not os.path.isfile(fn):
            raise OSError('Cannot find {}'.format(fn))
        rs = ResultManager(fn)
        pt = PrettyTable()
        set_style(pt)
        # show tables
        if len(unknown_args) == 0:
            pt.add_column('TABLES', rs.get_tables())
            print pt
            return
        table = unknown_args[0]
        if table not in rs.get_tables():
            raise ValueError("Cannot find table '{}'".format(table))
        if '--debug' in unknown_args:
            debug = True
            unknown_args.pop(unknown_args.index('--debug'))
        else:
            debug = False
        if '--condition' in unknown_args:
            fields = unknown_args[1:unknown_args.index('--condition')]
            condition = ' '.join(unknown_args[(unknown_args.index('--condition') + 1):])
        else:
            fields = unknown_args[1:]
            condition = None
        # show fields
        header = sorted(rs.get_fields(table),
                            key = lambda x: x.replace("_", "|").replace('method', 'AAA').replace('power', 'AAB'))
        if len(fields) == 0:
            pt.add_column(table,header)
            pt.align[table] = "l"
            print pt
        else:
            names = [x for x in fields if x in header]
            select_query = "SELECT {} from {} {}".format(','.join(names),
                table, condition if condition else '')
            if debug:
                sys.stderr.write(select_query + '\n')
            pt.field_names = names
            for item in rs.cur.execute(select_query).fetchall():
                pt.add_row(item)
            print pt
    elif fn.split('.')[-1] in ['gdat', 'h5', 'hdf5']:
        def show_gdat():
            if '-v2' in unknown_args:
                try:
                    print(runCommand('ptdump {}'.format(fn)))
                except:
                    raise ValueError('Cannot display summary information. Make sure "{}" exists and "ptdump" is installed'.format(fn))
            else:
                try:
                    gf = GFile(fn)
                    names = gf.getnames()
                    gf.close()
                except:
                    names = []
                for idx, name in enumerate(names):
                    print('/%s' % name)
                    if idx >= 50 and '-v0' in unknown_args:
                        remaining = len(names) - 50
                        if remaining:
                            printinfo('%s more items not displayed. Use "-v1/-v2" switch to see more.' % remaining)
                        break
        #
        if '--from' in unknown_args:
            for item in unknown_args[(unknown_args.index('--from') + 1):]:
                prefix, surfix = os.path.splitext(os.path.basename(item))
                if surfix in ['.gdat', '.h5', '.hdf5']:
                    runCommand('h5copy -v -i {0} -o {1} -s "/{2}" -d "/{2}"'.format(item, fn, re.sub(r'[^a-zA-Z0-9_]', '_', prefix)), accepted_rc = [0,1])
                    if not '-v0' in unknown_args:
                        printinfo('File {} processed!'.format(item))
        if '--to' in unknown_args:
            target = unknown_args[(unknown_args.index('--to') + 1):]
            target = target[0] if target else os.path.splitext(fn)[0]
            runCommand('mkdir -p {}'.format(target))
            gf = GFile(fn)
            names = gf.getnames()
            gf.close()
            for name in gf.getnames():
                dat = GData(fn, name)
                if not '-v0' in unknown_args:
                    printinfo('Saving files {}'.format(os.path.join(target, '{}.*.txt'.format(name))))
                dat.decompress()
                for key in dat:
                    np.savetxt(os.path.join(target, '{}.{}.txt'.format(name, key)), dat[key], fmt = '%s', delimiter = '\t')
        show_gdat()
    else:
        raise ValueError('Unsupported file type {}'.format(fn))
    return
Ejemplo n.º 6
0
class Executor:
    def __init__(self, args, unknown_args):
        self.args = args
        self.unknown_args = unknown_args
        self.option = checkInput(args)
        #
        env.logger = getLogger(max(min(args.verbosity - 1, 2), 0), fn = os.path.splitext(args.output[0])[0],
                               fv = 2 if args.verbosity is not 0 else 0)
        env.logger.debug('\n{0}\n{1}\n{0}'.format("="*min(len(args.cmd), 100), args.cmd))
        self.logger = env.logger.info if args.verbosity != 1 else printinfo
        #
        self.logger('Loading data from [{}] ...'.format(args.data))
        if self.option == 1:
            self.file = SFSFile(args.data)
        else:
            self.file = GFile(args.data)
        self.groups = self.file.getnames()
        self.logger('{:,d} units found'.format(len(self.groups)))
        # load non-missing data
        # to annotate to each variant position wether or not it is missing from assocation analysis
        # name it chip_file because it mimics the behavior of exome chip design
        if args.missing_unlisted:
            self.chip_file = SFSFile(args.missing_unlisted)
        else:
            self.chip_file = None
        # set limit
        if self.args.limit:
            self.limit = min(max(1, args.limit), len(self.groups))
            self.logger('{:,d} units will be analyzed'.format(self.limit))
        else:
            self.limit = len(self.groups)
        self.result = ResultManager(args.output, action = 'w' if not args.append else 'a')
        if self.args.verbosity == 1:
            # widgets = [FormatLabel('scanning: unit %(value)d - '), BouncingBar(marker=RotatingMarker())]
            widgets = [FormatLabel('scanning: unit %(value)d - '), Percentage(), ' ',
                       Bar('>'), ' ', ETA()]
            self.pbar = ProgressBar(widgets=widgets, maxval = self.limit,
                                    term_width=get_terminal_size()[0] - 5).start()
        else:
            # use each group's progress bar or not progress bar at all
            self.pbar = ProgressBarNull()
        # this is buffer object to hold all input dict to a list
        self.data_buffer = [] if self.args.replicates < 0 else None

    def run(self):
        if self.data_buffer is not None and self.option == 0:
            if self.args.resampling:
                self.logger('[WARNING] Loading all genotype data to memory. May fail if there is not enough memory!')
            else:
                self.logger('Converting data attributes ...')
        try:
            if self.option == 1:
                self.__scan_sfs()
            else:
                self.__scan_gdat()
        except:
            self.result.close(quiet = True)
            raise
        self.file.close()
        if self.chip_file is not None:
            self.chip_file.close()
        if self.data_buffer is not None:
            self.result.append(Calculator(self.args, self.unknown_args, self.data_buffer).run())
        self.result.close()
        self.pbar.finish()


    def __scan_gdat(self):
        '''scan gdat file'''
        maf = 'maf'
        pos = 'position'
        function_score = 'annotation'
        # Allow for customized key names in gdat file
        try:
            for x, y in zip(getColumn(self.args.data[:-5] + '.key', 1),
                            getColumn(self.args.data[:-5] + '.key', 2)):
                if x == 'maf':
                    maf = y
                if x == 'position':
                    pos = y
                if x == 'annotation':
                    function_score = y
        except:
            pass
        #
        for group, item in enumerate(self.groups):
            if group >= self.limit:
                break
            data = self.file.getdata(item)
            if self.args.resampling:
                data.decompress()
            else:
                data['haplotype'] = [[]]
            try:
                loci_input = {'pool':data['haplotype'], 'name':item,
                              'maf':list(data[maf]), 'pos':list(data[pos]),
                              'function_score':list(data[function_score])}
            except KeyError as e:
                env.logger.error('Column name {} not found. Please provide [{}.key] file to overwrite column naming conventions.'.\
                                 format(e, self.args.data[:-5]))
                continue
            loci_input['num_variants'] = len(loci_input['maf'])
            if self.chip_file:
                cdata = self.chip_file.getdata(item)
                if cdata is None or (not is_within(cdata['num_variants'], self.args.def_valid_locus)):
                    continue
                loci_input['missing'] = [False if x in cdata['pos'] else True for x in loci_input['pos']]
            else:
                loci_input['missing'] = None
            if is_within(loci_input['num_variants'], self.args.def_valid_locus):
                if self.data_buffer is None:
                    self.result.append(Calculator(self.args, self.unknown_args,loci_input).run())
                else:
                    self.data_buffer.append(loci_input)
            self.pbar.update(group + 1)

    def __scan_sfs(self):
        for group, loci_input in enumerate(self.file.data):
            if group >= self.limit:
                break
            # text sfs file does not have any haplotype pools
            loci_input['pool'] = [[]]
            if self.chip_file:
                cdata = self.chip_file.getdata(loci_input['name'])
                if cdata is None or (not is_within(cdata['num_variants'], self.args.def_valid_locus)):
                    continue
                loci_input['missing'] = [False if x in cdata['pos'] else True for x in loci_input['pos']]
                assert len(loci_input['missing']) == len(loci_input['maf'])
            else:
                loci_input['missing'] = None
            if is_within(loci_input['num_variants'], self.args.def_valid_locus):
                if self.data_buffer is None:
                    self.result.append(Calculator(self.args, self.unknown_args,loci_input).run())
                else:
                    self.data_buffer.append(loci_input)
            self.pbar.update(group + 1)
Ejemplo n.º 7
0
class Executor:
    def __init__(self, args, unknown_args):
        self.args = args
        self.unknown_args = unknown_args
        self.option = checkInput(args)
        #
        env.logger = getLogger(max(min(args.verbosity - 1, 2), 0),
                               fn=os.path.splitext(args.output[0])[0],
                               fv=2 if args.verbosity is not 0 else 0)
        env.logger.debug('\n{0}\n{1}\n{0}'.format(
            "=" * min(len(args.cmd), 100), args.cmd))
        self.logger = env.logger.info if args.verbosity != 1 else printinfo
        #
        self.logger('Loading data from [{}] ...'.format(args.data))
        if self.option == 1:
            self.file = SFSFile(args.data)
        else:
            self.file = GFile(args.data)
        self.groups = self.file.getnames()
        self.logger('{:,d} units found'.format(len(self.groups)))
        # load non-missing data
        # to annotate to each variant position wether or not it is missing from assocation analysis
        # name it chip_file because it mimics the behavior of exome chip design
        if args.missing_unlisted:
            self.chip_file = SFSFile(args.missing_unlisted)
        else:
            self.chip_file = None
        # set limit
        if self.args.limit:
            self.limit = min(max(1, args.limit), len(self.groups))
            self.logger('{:,d} units will be analyzed'.format(self.limit))
        else:
            self.limit = len(self.groups)
        self.result = ResultManager(args.output,
                                    action='w' if not args.append else 'a')
        if self.args.verbosity == 1:
            # widgets = [FormatLabel('scanning: unit %(value)d - '), BouncingBar(marker=RotatingMarker())]
            widgets = [
                FormatLabel('scanning: unit %(value)d - '),
                Percentage(), ' ',
                Bar('>'), ' ',
                ETA()
            ]
            self.pbar = ProgressBar(widgets=widgets,
                                    maxval=self.limit,
                                    term_width=get_terminal_size()[0] -
                                    5).start()
        else:
            # use each group's progress bar or not progress bar at all
            self.pbar = ProgressBarNull()
        # this is buffer object to hold all input dict to a list
        self.data_buffer = [] if self.args.replicates < 0 else None

    def run(self):
        if self.data_buffer is not None and self.option == 0:
            if self.args.resampling:
                self.logger(
                    '[WARNING] Loading all genotype data to memory. May fail if there is not enough memory!'
                )
            else:
                self.logger('Converting data attributes ...')
        try:
            if self.option == 1:
                self.__scan_sfs()
            else:
                self.__scan_gdat()
        except:
            self.result.close(quiet=True)
            raise
        self.file.close()
        if self.chip_file is not None:
            self.chip_file.close()
        if self.data_buffer is not None:
            self.result.append(
                Calculator(self.args, self.unknown_args,
                           self.data_buffer).run())
        self.result.close()
        self.pbar.finish()

    def __scan_gdat(self):
        '''scan gdat file'''
        maf = 'maf'
        pos = 'position'
        function_score = 'annotation'
        # Allow for customized key names in gdat file
        try:
            for x, y in zip(getColumn(self.args.data[:-5] + '.key', 1),
                            getColumn(self.args.data[:-5] + '.key', 2)):
                if x == 'maf':
                    maf = y
                if x == 'position':
                    pos = y
                if x == 'annotation':
                    function_score = y
        except:
            pass
        #
        for group, item in enumerate(self.groups):
            if group >= self.limit:
                break
            data = self.file.getdata(item)
            if self.args.resampling:
                data.decompress()
            else:
                data['haplotype'] = [[]]
            try:
                loci_input = {
                    'pool': data['haplotype'],
                    'name': item,
                    'maf': list(data[maf]),
                    'pos': list(data[pos]),
                    'function_score': list(data[function_score])
                }
            except KeyError as e:
                env.logger.error('Column name {} not found. Please provide [{}.key] file to overwrite column naming conventions.'.\
                                 format(e, self.args.data[:-5]))
                continue
            loci_input['num_variants'] = len(loci_input['maf'])
            if self.chip_file:
                cdata = self.chip_file.getdata(item)
                if cdata is None or (not is_within(cdata['num_variants'],
                                                   self.args.def_valid_locus)):
                    continue
                loci_input['missing'] = [
                    False if x in cdata['pos'] else True
                    for x in loci_input['pos']
                ]
            else:
                loci_input['missing'] = None
            if is_within(loci_input['num_variants'],
                         self.args.def_valid_locus):
                if self.data_buffer is None:
                    self.result.append(
                        Calculator(self.args, self.unknown_args,
                                   loci_input).run())
                else:
                    self.data_buffer.append(loci_input)
            self.pbar.update(group + 1)

    def __scan_sfs(self):
        for group, loci_input in enumerate(self.file.data):
            if group >= self.limit:
                break
            # text sfs file does not have any haplotype pools
            loci_input['pool'] = [[]]
            if self.chip_file:
                cdata = self.chip_file.getdata(loci_input['name'])
                if cdata is None or (not is_within(cdata['num_variants'],
                                                   self.args.def_valid_locus)):
                    continue
                loci_input['missing'] = [
                    False if x in cdata['pos'] else True
                    for x in loci_input['pos']
                ]
                assert len(loci_input['missing']) == len(loci_input['maf'])
            else:
                loci_input['missing'] = None
            if is_within(loci_input['num_variants'],
                         self.args.def_valid_locus):
                if self.data_buffer is None:
                    self.result.append(
                        Calculator(self.args, self.unknown_args,
                                   loci_input).run())
                else:
                    self.data_buffer.append(loci_input)
            self.pbar.update(group + 1)
Ejemplo n.º 8
0
def showFields(fn, border, unknown_args):
    def set_style(pt):
        if border == 'less':
            pt.set_style(MSWORD_FRIENDLY)
        if border == 'no':
            pt.set_style(PLAIN_COLUMNS)

    #
    if fn.endswith('.csv'):
        pt = from_csv(openFile(fn), delimiter=',')
        set_style(pt)
        header = [x for x in pt.field_names if not x.startswith('_')]
        if len(unknown_args) == 0:
            print('\n'.join(header))
            return
        fields = [
            re.compile(item.replace('*', '(.*?)')) if '*' in item else item
            for item in unknown_args
        ]
        output = []
        for item in fields:
            if type(item) is str:
                item = [x for x in header if x == item]
            else:
                item = [x for x in header if re.match(item, x)]
            output.extend(item)
        print pt.get_string(fields=output)
    elif fn.endswith('.SEQPowerDB'):
        if not os.path.isfile(fn):
            raise OSError('Cannot find {}'.format(fn))
        rs = ResultManager(fn)
        pt = PrettyTable()
        set_style(pt)
        # show tables
        if len(unknown_args) == 0:
            pt.add_column('TABLES', rs.get_tables())
            print pt
            return
        table = unknown_args[0]
        if table not in rs.get_tables():
            raise ValueError("Cannot find table '{}'".format(table))
        if '--debug' in unknown_args:
            debug = True
            unknown_args.pop(unknown_args.index('--debug'))
        else:
            debug = False
        if '--condition' in unknown_args:
            fields = unknown_args[1:unknown_args.index('--condition')]
            condition = ' '.join(
                unknown_args[(unknown_args.index('--condition') + 1):])
        else:
            fields = unknown_args[1:]
            condition = None
        # show fields
        header = sorted(rs.get_fields(table),
                        key=lambda x: x.replace("_", "|").replace(
                            'method', 'AAA').replace('power', 'AAB'))
        if len(fields) == 0:
            pt.add_column(table, header)
            pt.align[table] = "l"
            print pt
        else:
            names = [x for x in fields if x in header]
            select_query = "SELECT {} from {} {}".format(
                ','.join(names), table, condition if condition else '')
            if debug:
                sys.stderr.write(select_query + '\n')
            pt.field_names = names
            for item in rs.cur.execute(select_query).fetchall():
                pt.add_row(item)
            print pt
    elif fn.split('.')[-1] in ['gdat', 'h5', 'hdf5']:

        def show_gdat():
            if '-v2' in unknown_args:
                try:
                    print(runCommand('ptdump {}'.format(fn)))
                except:
                    raise ValueError(
                        'Cannot display summary information. Make sure "{}" exists and "ptdump" is installed'
                        .format(fn))
            else:
                try:
                    gf = GFile(fn)
                    names = gf.getnames()
                    gf.close()
                except:
                    names = []
                for idx, name in enumerate(names):
                    print('/%s' % name)
                    if idx >= 50 and '-v0' in unknown_args:
                        remaining = len(names) - 50
                        if remaining:
                            printinfo(
                                '%s more items not displayed. Use "-v1/-v2" switch to see more.'
                                % remaining)
                        break

        #
        if '--from' in unknown_args:
            for item in unknown_args[(unknown_args.index('--from') + 1):]:
                prefix, surfix = os.path.splitext(os.path.basename(item))
                if surfix in ['.gdat', '.h5', '.hdf5']:
                    runCommand(
                        'h5copy -v -i {0} -o {1} -s "/{2}" -d "/{2}"'.format(
                            item, fn, re.sub(r'[^a-zA-Z0-9_]', '_', prefix)),
                        accepted_rc=[0, 1])
                    if not '-v0' in unknown_args:
                        printinfo('File {} processed!'.format(item))
        if '--to' in unknown_args:
            target = unknown_args[(unknown_args.index('--to') + 1):]
            target = target[0] if target else os.path.splitext(fn)[0]
            runCommand('mkdir -p {}'.format(target))
            gf = GFile(fn)
            names = gf.getnames()
            gf.close()
            for name in gf.getnames():
                dat = GData(fn, name)
                if not '-v0' in unknown_args:
                    printinfo('Saving files {}'.format(
                        os.path.join(target, '{}.*.txt'.format(name))))
                dat.decompress()
                for key in dat:
                    np.savetxt(os.path.join(target,
                                            '{}.{}.txt'.format(name, key)),
                               dat[key],
                               fmt='%s',
                               delimiter='\t')
        show_gdat()
    else:
        raise ValueError('Unsupported file type {}'.format(fn))
    return
Ejemplo n.º 9
0
def showFields(fn, border, unknown_args):
    def set_style(pt):
        if border == "less":
            pt.set_style(MSWORD_FRIENDLY)
        if border == "no":
            pt.set_style(PLAIN_COLUMNS)

    #
    if fn.endswith(".csv"):
        pt = from_csv(openFile(fn), delimiter=",")
        set_style(pt)
        header = [x for x in pt.field_names if not x.startswith("_")]
        if len(unknown_args) == 0:
            print ("\n".join(header))
            return
        fields = [re.compile(item.replace("*", "(.*?)")) if "*" in item else item for item in unknown_args]
        output = []
        for item in fields:
            if type(item) is str:
                item = [x for x in header if x == item]
            else:
                item = [x for x in header if re.match(item, x)]
            output.extend(item)
        print pt.get_string(fields=output)
    elif fn.endswith(".SEQPowerDB"):
        if not os.path.isfile(fn):
            raise OSError("Cannot find {}".format(fn))
        rs = ResultManager(fn)
        pt = PrettyTable()
        set_style(pt)
        # show tables
        if len(unknown_args) == 0:
            pt.add_column("TABLES", rs.get_tables())
            print pt
            return
        table = unknown_args[0]
        if table not in rs.get_tables():
            raise ValueError("Cannot find table '{}'".format(table))
        if "--debug" in unknown_args:
            debug = True
            unknown_args.pop(unknown_args.index("--debug"))
        else:
            debug = False
        if "--condition" in unknown_args:
            fields = unknown_args[1 : unknown_args.index("--condition")]
            condition = " ".join(unknown_args[(unknown_args.index("--condition") + 1) :])
        else:
            fields = unknown_args[1:]
            condition = None
        # show fields
        header = sorted(
            rs.get_fields(table), key=lambda x: x.replace("_", "|").replace("method", "AAA").replace("power", "AAB")
        )
        if len(fields) == 0:
            pt.add_column(table, header)
            pt.align[table] = "l"
            print pt
        else:
            names = [x for x in fields if x in header]
            select_query = "SELECT {} from {} {}".format(",".join(names), table, condition if condition else "")
            if debug:
                sys.stderr.write(select_query + "\n")
            pt.field_names = names
            for item in rs.cur.execute(select_query).fetchall():
                pt.add_row(item)
            print pt
    elif fn.split(".")[-1] in ["gdat", "h5", "hdf5"]:

        def show_gdat():
            if "-v2" in unknown_args:
                try:
                    print (runCommand("ptdump {}".format(fn)))
                except:
                    raise ValueError(
                        'Cannot display summary information. Make sure "{}" exists and "ptdump" is installed'.format(fn)
                    )
            else:
                try:
                    gf = GFile(fn)
                    names = gf.getnames()
                    gf.close()
                except:
                    names = []
                for idx, name in enumerate(names):
                    print ("/%s" % name)
                    if idx >= 50 and "-v0" in unknown_args:
                        remaining = len(names) - 50
                        if remaining:
                            printinfo('%s more items not displayed. Use "-v1/-v2" switch to see more.' % remaining)
                        break

        #
        if "--from" in unknown_args:
            for item in unknown_args[(unknown_args.index("--from") + 1) :]:
                prefix, surfix = os.path.splitext(os.path.basename(item))
                if surfix in [".gdat", ".h5", ".hdf5"]:
                    runCommand(
                        'h5copy -v -i {0} -o {1} -s "/{2}" -d "/{2}"'.format(
                            item, fn, re.sub(r"[^a-zA-Z0-9_]", "_", prefix)
                        ),
                        accepted_rc=[0, 1],
                    )
                    if not "-v0" in unknown_args:
                        printinfo("File {} processed!".format(item))
        if "--to" in unknown_args:
            target = unknown_args[(unknown_args.index("--to") + 1) :]
            target = target[0] if target else os.path.splitext(fn)[0]
            runCommand("mkdir -p {}".format(target))
            gf = GFile(fn)
            names = gf.getnames()
            gf.close()
            for name in gf.getnames():
                dat = GData(fn, name)
                if not "-v0" in unknown_args:
                    printinfo("Saving files {}".format(os.path.join(target, "{}.*.txt".format(name))))
                dat.decompress()
                for key in dat:
                    np.savetxt(os.path.join(target, "{}.{}.txt".format(name, key)), dat[key], fmt="%s", delimiter="\t")
        show_gdat()
    else:
        raise ValueError("Unsupported file type {}".format(fn))
    return