def __init__(self, args, unknown_args): self.args = args self.unknown_args = unknown_args self.option = checkInput(args) # env.logger = getLogger(max(min(args.verbosity - 1, 2), 0), fn=os.path.splitext(args.output[0])[0], fv=2 if args.verbosity is not 0 else 0) env.logger.debug('\n{0}\n{1}\n{0}'.format( "=" * min(len(args.cmd), 100), args.cmd)) self.logger = env.logger.info if args.verbosity != 1 else printinfo # self.logger('Loading data from [{}] ...'.format(args.data)) if self.option == 1: self.file = SFSFile(args.data) else: self.file = GFile(args.data) self.groups = self.file.getnames() self.logger('{:,d} units found'.format(len(self.groups))) # load non-missing data # to annotate to each variant position wether or not it is missing from assocation analysis # name it chip_file because it mimics the behavior of exome chip design if args.missing_unlisted: self.chip_file = SFSFile(args.missing_unlisted) else: self.chip_file = None # set limit if self.args.limit: self.limit = min(max(1, args.limit), len(self.groups)) self.logger('{:,d} units will be analyzed'.format(self.limit)) else: self.limit = len(self.groups) self.result = ResultManager(args.output, action='w' if not args.append else 'a') if self.args.verbosity == 1: # widgets = [FormatLabel('scanning: unit %(value)d - '), BouncingBar(marker=RotatingMarker())] widgets = [ FormatLabel('scanning: unit %(value)d - '), Percentage(), ' ', Bar('>'), ' ', ETA() ] self.pbar = ProgressBar(widgets=widgets, maxval=self.limit, term_width=get_terminal_size()[0] - 5).start() else: # use each group's progress bar or not progress bar at all self.pbar = ProgressBarNull() # this is buffer object to hold all input dict to a list self.data_buffer = [] if self.args.replicates < 0 else None
def show_gdat(): if '-v2' in unknown_args: try: print(runCommand('ptdump {}'.format(fn))) except: raise ValueError( 'Cannot display summary information. Make sure "{}" exists and "ptdump" is installed' .format(fn)) else: try: gf = GFile(fn) names = gf.getnames() gf.close() except: names = [] for idx, name in enumerate(names): print('/%s' % name) if idx >= 50 and '-v0' in unknown_args: remaining = len(names) - 50 if remaining: printinfo( '%s more items not displayed. Use "-v1/-v2" switch to see more.' % remaining) break
def showFields(fn, border, unknown_args): def set_style(pt): if border == 'less': pt.set_style(MSWORD_FRIENDLY) if border == 'no': pt.set_style(PLAIN_COLUMNS) # if fn.endswith('.csv'): pt = from_csv(openFile(fn), delimiter=',') set_style(pt) header = [x for x in pt.field_names if not x.startswith('_')] if len(unknown_args) == 0: print('\n'.join(header)) return fields = [ re.compile(item.replace('*', '(.*?)')) if '*' in item else item for item in unknown_args ] output = [] for item in fields: if type(item) is str: item = [x for x in header if x == item] else: item = [x for x in header if re.match(item, x)] output.extend(item) print pt.get_string(fields=output) elif fn.endswith('.SEQPowerDB'): if not os.path.isfile(fn): raise OSError('Cannot find {}'.format(fn)) rs = ResultManager(fn) pt = PrettyTable() set_style(pt) # show tables if len(unknown_args) == 0: pt.add_column('TABLES', rs.get_tables()) print pt return table = unknown_args[0] if table not in rs.get_tables(): raise ValueError("Cannot find table '{}'".format(table)) if '--debug' in unknown_args: debug = True unknown_args.pop(unknown_args.index('--debug')) else: debug = False if '--condition' in unknown_args: fields = unknown_args[1:unknown_args.index('--condition')] condition = ' '.join( unknown_args[(unknown_args.index('--condition') + 1):]) else: fields = unknown_args[1:] condition = None # show fields header = sorted(rs.get_fields(table), key=lambda x: x.replace("_", "|").replace( 'method', 'AAA').replace('power', 'AAB')) if len(fields) == 0: pt.add_column(table, header) pt.align[table] = "l" print pt else: names = [x for x in fields if x in header] select_query = "SELECT {} from {} {}".format( ','.join(names), table, condition if condition else '') if debug: sys.stderr.write(select_query + '\n') pt.field_names = names for item in rs.cur.execute(select_query).fetchall(): pt.add_row(item) print pt elif fn.split('.')[-1] in ['gdat', 'h5', 'hdf5']: def show_gdat(): if '-v2' in unknown_args: try: print(runCommand('ptdump {}'.format(fn))) except: raise ValueError( 'Cannot display summary information. Make sure "{}" exists and "ptdump" is installed' .format(fn)) else: try: gf = GFile(fn) names = gf.getnames() gf.close() except: names = [] for idx, name in enumerate(names): print('/%s' % name) if idx >= 50 and '-v0' in unknown_args: remaining = len(names) - 50 if remaining: printinfo( '%s more items not displayed. Use "-v1/-v2" switch to see more.' % remaining) break # if '--from' in unknown_args: for item in unknown_args[(unknown_args.index('--from') + 1):]: prefix, surfix = os.path.splitext(os.path.basename(item)) if surfix in ['.gdat', '.h5', '.hdf5']: runCommand( 'h5copy -v -i {0} -o {1} -s "/{2}" -d "/{2}"'.format( item, fn, re.sub(r'[^a-zA-Z0-9_]', '_', prefix)), accepted_rc=[0, 1]) if not '-v0' in unknown_args: printinfo('File {} processed!'.format(item)) if '--to' in unknown_args: target = unknown_args[(unknown_args.index('--to') + 1):] target = target[0] if target else os.path.splitext(fn)[0] runCommand('mkdir -p {}'.format(target)) gf = GFile(fn) names = gf.getnames() gf.close() for name in gf.getnames(): dat = GData(fn, name) if not '-v0' in unknown_args: printinfo('Saving files {}'.format( os.path.join(target, '{}.*.txt'.format(name)))) dat.decompress() for key in dat: np.savetxt(os.path.join(target, '{}.{}.txt'.format(name, key)), dat[key], fmt='%s', delimiter='\t') show_gdat() else: raise ValueError('Unsupported file type {}'.format(fn)) return