def __init__(self, args, unknown_args): self.args = args self.unknown_args = unknown_args self.option = checkInput(args) # env.logger = getLogger(max(min(args.verbosity - 1, 2), 0), fn=os.path.splitext(args.output[0])[0], fv=2 if args.verbosity is not 0 else 0) env.logger.debug('\n{0}\n{1}\n{0}'.format( "=" * min(len(args.cmd), 100), args.cmd)) self.logger = env.logger.info if args.verbosity != 1 else printinfo # self.logger('Loading data from [{}] ...'.format(args.data)) if self.option == 1: self.file = SFSFile(args.data) else: self.file = GFile(args.data) self.groups = self.file.getnames() self.logger('{:,d} units found'.format(len(self.groups))) # load non-missing data # to annotate to each variant position wether or not it is missing from assocation analysis # name it chip_file because it mimics the behavior of exome chip design if args.missing_unlisted: self.chip_file = SFSFile(args.missing_unlisted) else: self.chip_file = None # set limit if self.args.limit: self.limit = min(max(1, args.limit), len(self.groups)) self.logger('{:,d} units will be analyzed'.format(self.limit)) else: self.limit = len(self.groups) self.result = ResultManager(args.output, action='w' if not args.append else 'a') if self.args.verbosity == 1: # widgets = [FormatLabel('scanning: unit %(value)d - '), BouncingBar(marker=RotatingMarker())] widgets = [ FormatLabel('scanning: unit %(value)d - '), Percentage(), ' ', Bar('>'), ' ', ETA() ] self.pbar = ProgressBar(widgets=widgets, maxval=self.limit, term_width=get_terminal_size()[0] - 5).start() else: # use each group's progress bar or not progress bar at all self.pbar = ProgressBarNull() # this is buffer object to hold all input dict to a list self.data_buffer = [] if self.args.replicates < 0 else None
def __init__(self, args, unknown_args): self.args = args self.unknown_args = unknown_args self.option = checkInput(args) # env.logger = getLogger( max(min(args.verbosity - 1, 2), 0), fn=os.path.splitext(args.output[0])[0], fv=2 if args.verbosity is not 0 else 0, ) env.logger.debug("\n{0}\n{1}\n{0}".format("=" * min(len(args.cmd), 100), args.cmd)) self.logger = env.logger.info if args.verbosity != 1 else printinfo # self.logger("Loading data from [{}] ...".format(args.data)) if self.option == 1: self.file = SFSFile(args.data) else: self.file = GFile(args.data) self.groups = self.file.getnames() self.logger("{:,d} units found".format(len(self.groups))) # load non-missing data # to annotate to each variant position wether or not it is missing from assocation analysis # name it chip_file because it mimics the behavior of exome chip design if args.missing_unlisted: self.chip_file = SFSFile(args.missing_unlisted) else: self.chip_file = None # set limit if self.args.limit: self.limit = min(max(1, args.limit), len(self.groups)) self.logger("{:,d} units will be analyzed".format(self.limit)) else: self.limit = len(self.groups) self.result = ResultManager(args.output, action="w" if not args.append else "a") if self.args.verbosity == 1: # widgets = [FormatLabel('scanning: unit %(value)d - '), BouncingBar(marker=RotatingMarker())] widgets = [FormatLabel("scanning: unit %(value)d - "), Percentage(), " ", Bar(">"), " ", ETA()] self.pbar = ProgressBar(widgets=widgets, maxval=self.limit, term_width=get_terminal_size()[0] - 5).start() else: # use each group's progress bar or not progress bar at all self.pbar = ProgressBarNull() # this is buffer object to hold all input dict to a list self.data_buffer = [] if self.args.replicates < 0 else None
class Executor: def __init__(self, args, unknown_args): self.args = args self.unknown_args = unknown_args self.option = checkInput(args) # env.logger = getLogger(max(min(args.verbosity - 1, 2), 0), fn = os.path.splitext(args.output[0])[0], fv = 2 if args.verbosity is not 0 else 0) env.logger.debug('\n{0}\n{1}\n{0}'.format("="*min(len(args.cmd), 100), args.cmd)) self.logger = env.logger.info if args.verbosity != 1 else printinfo # self.logger('Loading data from [{}] ...'.format(args.data)) if self.option == 1: self.file = SFSFile(args.data) else: self.file = GFile(args.data) self.groups = self.file.getnames() self.logger('{:,d} units found'.format(len(self.groups))) # load non-missing data # to annotate to each variant position wether or not it is missing from assocation analysis # name it chip_file because it mimics the behavior of exome chip design if args.missing_unlisted: self.chip_file = SFSFile(args.missing_unlisted) else: self.chip_file = None # set limit if self.args.limit: self.limit = min(max(1, args.limit), len(self.groups)) self.logger('{:,d} units will be analyzed'.format(self.limit)) else: self.limit = len(self.groups) self.result = ResultManager(args.output, action = 'w' if not args.append else 'a') if self.args.verbosity == 1: # widgets = [FormatLabel('scanning: unit %(value)d - '), BouncingBar(marker=RotatingMarker())] widgets = [FormatLabel('scanning: unit %(value)d - '), Percentage(), ' ', Bar('>'), ' ', ETA()] self.pbar = ProgressBar(widgets=widgets, maxval = self.limit, term_width=get_terminal_size()[0] - 5).start() else: # use each group's progress bar or not progress bar at all self.pbar = ProgressBarNull() # this is buffer object to hold all input dict to a list self.data_buffer = [] if self.args.replicates < 0 else None def run(self): if self.data_buffer is not None and self.option == 0: if self.args.resampling: self.logger('[WARNING] Loading all genotype data to memory. May fail if there is not enough memory!') else: self.logger('Converting data attributes ...') try: if self.option == 1: self.__scan_sfs() else: self.__scan_gdat() except: self.result.close(quiet = True) raise self.file.close() if self.chip_file is not None: self.chip_file.close() if self.data_buffer is not None: self.result.append(Calculator(self.args, self.unknown_args, self.data_buffer).run()) self.result.close() self.pbar.finish() def __scan_gdat(self): '''scan gdat file''' maf = 'maf' pos = 'position' function_score = 'annotation' # Allow for customized key names in gdat file try: for x, y in zip(getColumn(self.args.data[:-5] + '.key', 1), getColumn(self.args.data[:-5] + '.key', 2)): if x == 'maf': maf = y if x == 'position': pos = y if x == 'annotation': function_score = y except: pass # for group, item in enumerate(self.groups): if group >= self.limit: break data = self.file.getdata(item) if self.args.resampling: data.decompress() else: data['haplotype'] = [[]] try: loci_input = {'pool':data['haplotype'], 'name':item, 'maf':list(data[maf]), 'pos':list(data[pos]), 'function_score':list(data[function_score])} except KeyError as e: env.logger.error('Column name {} not found. Please provide [{}.key] file to overwrite column naming conventions.'.\ format(e, self.args.data[:-5])) continue loci_input['num_variants'] = len(loci_input['maf']) if self.chip_file: cdata = self.chip_file.getdata(item) if cdata is None or (not is_within(cdata['num_variants'], self.args.def_valid_locus)): continue loci_input['missing'] = [False if x in cdata['pos'] else True for x in loci_input['pos']] else: loci_input['missing'] = None if is_within(loci_input['num_variants'], self.args.def_valid_locus): if self.data_buffer is None: self.result.append(Calculator(self.args, self.unknown_args,loci_input).run()) else: self.data_buffer.append(loci_input) self.pbar.update(group + 1) def __scan_sfs(self): for group, loci_input in enumerate(self.file.data): if group >= self.limit: break # text sfs file does not have any haplotype pools loci_input['pool'] = [[]] if self.chip_file: cdata = self.chip_file.getdata(loci_input['name']) if cdata is None or (not is_within(cdata['num_variants'], self.args.def_valid_locus)): continue loci_input['missing'] = [False if x in cdata['pos'] else True for x in loci_input['pos']] assert len(loci_input['missing']) == len(loci_input['maf']) else: loci_input['missing'] = None if is_within(loci_input['num_variants'], self.args.def_valid_locus): if self.data_buffer is None: self.result.append(Calculator(self.args, self.unknown_args,loci_input).run()) else: self.data_buffer.append(loci_input) self.pbar.update(group + 1)
class Executor: def __init__(self, args, unknown_args): self.args = args self.unknown_args = unknown_args self.option = checkInput(args) # env.logger = getLogger(max(min(args.verbosity - 1, 2), 0), fn=os.path.splitext(args.output[0])[0], fv=2 if args.verbosity is not 0 else 0) env.logger.debug('\n{0}\n{1}\n{0}'.format( "=" * min(len(args.cmd), 100), args.cmd)) self.logger = env.logger.info if args.verbosity != 1 else printinfo # self.logger('Loading data from [{}] ...'.format(args.data)) if self.option == 1: self.file = SFSFile(args.data) else: self.file = GFile(args.data) self.groups = self.file.getnames() self.logger('{:,d} units found'.format(len(self.groups))) # load non-missing data # to annotate to each variant position wether or not it is missing from assocation analysis # name it chip_file because it mimics the behavior of exome chip design if args.missing_unlisted: self.chip_file = SFSFile(args.missing_unlisted) else: self.chip_file = None # set limit if self.args.limit: self.limit = min(max(1, args.limit), len(self.groups)) self.logger('{:,d} units will be analyzed'.format(self.limit)) else: self.limit = len(self.groups) self.result = ResultManager(args.output, action='w' if not args.append else 'a') if self.args.verbosity == 1: # widgets = [FormatLabel('scanning: unit %(value)d - '), BouncingBar(marker=RotatingMarker())] widgets = [ FormatLabel('scanning: unit %(value)d - '), Percentage(), ' ', Bar('>'), ' ', ETA() ] self.pbar = ProgressBar(widgets=widgets, maxval=self.limit, term_width=get_terminal_size()[0] - 5).start() else: # use each group's progress bar or not progress bar at all self.pbar = ProgressBarNull() # this is buffer object to hold all input dict to a list self.data_buffer = [] if self.args.replicates < 0 else None def run(self): if self.data_buffer is not None and self.option == 0: if self.args.resampling: self.logger( '[WARNING] Loading all genotype data to memory. May fail if there is not enough memory!' ) else: self.logger('Converting data attributes ...') try: if self.option == 1: self.__scan_sfs() else: self.__scan_gdat() except: self.result.close(quiet=True) raise self.file.close() if self.chip_file is not None: self.chip_file.close() if self.data_buffer is not None: self.result.append( Calculator(self.args, self.unknown_args, self.data_buffer).run()) self.result.close() self.pbar.finish() def __scan_gdat(self): '''scan gdat file''' maf = 'maf' pos = 'position' function_score = 'annotation' # Allow for customized key names in gdat file try: for x, y in zip(getColumn(self.args.data[:-5] + '.key', 1), getColumn(self.args.data[:-5] + '.key', 2)): if x == 'maf': maf = y if x == 'position': pos = y if x == 'annotation': function_score = y except: pass # for group, item in enumerate(self.groups): if group >= self.limit: break data = self.file.getdata(item) if self.args.resampling: data.decompress() else: data['haplotype'] = [[]] try: loci_input = { 'pool': data['haplotype'], 'name': item, 'maf': list(data[maf]), 'pos': list(data[pos]), 'function_score': list(data[function_score]) } except KeyError as e: env.logger.error('Column name {} not found. Please provide [{}.key] file to overwrite column naming conventions.'.\ format(e, self.args.data[:-5])) continue loci_input['num_variants'] = len(loci_input['maf']) if self.chip_file: cdata = self.chip_file.getdata(item) if cdata is None or (not is_within(cdata['num_variants'], self.args.def_valid_locus)): continue loci_input['missing'] = [ False if x in cdata['pos'] else True for x in loci_input['pos'] ] else: loci_input['missing'] = None if is_within(loci_input['num_variants'], self.args.def_valid_locus): if self.data_buffer is None: self.result.append( Calculator(self.args, self.unknown_args, loci_input).run()) else: self.data_buffer.append(loci_input) self.pbar.update(group + 1) def __scan_sfs(self): for group, loci_input in enumerate(self.file.data): if group >= self.limit: break # text sfs file does not have any haplotype pools loci_input['pool'] = [[]] if self.chip_file: cdata = self.chip_file.getdata(loci_input['name']) if cdata is None or (not is_within(cdata['num_variants'], self.args.def_valid_locus)): continue loci_input['missing'] = [ False if x in cdata['pos'] else True for x in loci_input['pos'] ] assert len(loci_input['missing']) == len(loci_input['maf']) else: loci_input['missing'] = None if is_within(loci_input['num_variants'], self.args.def_valid_locus): if self.data_buffer is None: self.result.append( Calculator(self.args, self.unknown_args, loci_input).run()) else: self.data_buffer.append(loci_input) self.pbar.update(group + 1)